diff --git a/Extras/vectormathlibrary/Makefile b/Extras/vectormathlibrary/Makefile
index d56d929ec..49b5523a3 100644
--- a/Extras/vectormathlibrary/Makefile
+++ b/Extras/vectormathlibrary/Makefile
@@ -1,128 +1,119 @@
-# Makefile for testsuite for the PPU SIMD math library
-#   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-#   All rights reserved.
-#
-#   Redistribution and use in source and binary forms,
-#   with or without modification, are permitted provided that the
-#   following conditions are met:
-#    * Redistributions of source code must retain the above copyright
-#      notice, this list of conditions and the following disclaimer.
-#    * Redistributions in binary form must reproduce the above copyright
-#      notice, this list of conditions and the following disclaimer in the
-#      documentation and/or other materials provided with the distribution.
-#    * Neither the name of the Sony Computer Entertainment Inc nor the names
-#      of its contributors may be used to endorse or promote products derived
-#      from this software without specific prior written permission.
-#
-#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-#   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-#   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-#   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-#   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-#   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-#   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-#   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-#   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-#   POSSIBILITY OF SUCH DAMAGE.
-
-TESTS = main_vmtest
-
-STATIC_TESTS = $(TESTS)
-SHARED_TESTS = $(TESTS:=.shared)
-ALL_TESTS = $(STATIC_TESTS) $(SHARED_TESTS)
-
-INCLUDES_PPU = -I../simdmathlibrary/common -Iother/rs6000
-#ARCH_PPU can also be 64
-ARCH_PPU = 32 
-CROSS_PPU = ppu-
-AR_PPU = $(CROSS_PPU)ar
-CC_PPU = $(CROSS_PPU)gcc
-CXX_PPU = $(CROSS_PPU)g++
-TEST_CMD_PPU =
-
-ARCH_CFLAGS_PPU = -m$(ARCH_PPU) -maltivec -mabi=altivec
-CFLAGS_PPU = $(INCLUDES_PPU) -O2 -W -Wall $(ARCH_CFLAGS_PPU)
-STATIC_LDFLAGS_PPU = -static
-SHARED_LDFLAGS_PPU = -Wl,-rpath=..
-LDFLAGS_PPU = $(ARCH_CFLAGS_PPU) -L../simdmathlibrary/ppu -l$(LIB_BASE) -lm -lstdc++
-
-MAKE_DEFS = \
-	LIB_BASE='$(LIB_BASE)' \
-	LIB_NAME='$(LIB_NAME)' \
-	STATIC_LIB='$(STATIC_LIB)' \
-	SHARED_LIB='$(SHARED_LIB)' \
-	ARCH_PPU='$(ARCH_PPU)' \
-	ARCH_CFLAGS_PPU='$(ARCH_CFLAGS_PPU)' \
-	CROSS_PPU='$(CROSS_PPU)' \
-	AR_PPU='$(AR_PPU)' \
-	CC_PPU='$(CC_PPU)' \
-	CXX_PPU='$(CXX_PPU)' \
-	TEST_CMD_PPU='$(TEST_CMD_PPU)'
-
-LIB_BASE = simdmath
-LIB_NAME = lib$(LIB_BASE)
-STATIC_LIB = $(LIB_NAME).a
-SHARED_LIB = $(LIB_NAME).so
-
-TEST_CMD = $(TEST_CMD_PPU)
-
-#COMMON_OBJS = testutils.o
-
-
-all: $(ALL_TESTS)
-
-
-$(STATIC_TESTS): %: %.o ../simdmathlibrary/ppu/$(STATIC_LIB) $(COMMON_OBJS)
-	$(CC_PPU) $*.o $(COMMON_OBJS) $(LDFLAGS_PPU) $(STATIC_LDFLAGS_PPU) -o $@
-
-$(SHARED_TESTS): %.shared: %.o ../simdmathlibrary/ppu/$(SHARED_LIB) $(COMMON_OBJS)
-	$(CC_PPU) $*.o $(COMMON_OBJS) $(LDFLAGS_PPU) $(SHARED_LDFLAGS_PPU) -o $@
-
-clean:
-	rm -f *.o
-	rm -f $(STATIC_TESTS) $(SHARED_TESTS)
-	rm -f core*
-
-check: $(ALL_TESTS)
-	for test in $(ALL_TESTS); do	\
-		echo "TEST $${test}";		\
-		if $(TEST_CMD) ./$${test}; then \
-			pass="$$pass $$test"; \
-		else \
-			fail="$$fail $$test"; \
-		fi \
-	done; \
-	echo; echo "PASS:$$pass"; echo "FAIL:$$fail"; \
-	test -z "$$fail"
-
-static_check:
-	$(MAKE) $(MAKE_DEFS) ALL_TESTS="$(STATIC_TESTS)" check
-
-shared_check:
-	$(MAKE) $(MAKE_DEFS) ALL_TESTS="$(SHARED_TESTS)" check
-
-../$(STATIC_LIB):
-	cd ../;$(MAKE) $(MAKE_DEFS) $(STATIC_LIB)
-
-../$(SHARED_LIB):
-	cd ../;$(MAKE) $(MAKE_DEFS) $(SHARED_LIB)
-
-%.o: %.c  common-test.h testutils.h
-	$(CC_PPU) $(CFLAGS_PPU) -c $<
-
-#----------
-#   C++
-#----------
-%.o: %.C
-	$(CXX_PPU) $(CFLAGS_PPU) -c $<
-
-%.o: %.cpp
-	$(CXX_PPU) $(CFLAGS_PPU) -c $<
-
-%.o: %.cc
-	$(CXX_PPU) $(CFLAGS_PPU) -c $<
-
-%.o: %.cxx
-	$(CXX_PPU) $(CFLAGS_PPU) -c $<
-
+# Makefile for vector math library.
+#
+#   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms,
+#   with or without modification, are permitted provided that the
+#   following conditions are met:
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in the
+#      documentation and/or other materials provided with the distribution.
+#    * Neither the name of the Sony Computer Entertainment Inc nor the names
+#      of its contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+#   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#   POSSIBILITY OF SUCH DAMAGE.
+
+# How to build:
+#
+#   Nothing to do (The all of the library is implemented as inline).
+#
+# How to install:
+#
+#   To install the library:
+#
+#      make ARCH=<ARCHITECTURE> install
+#
+#   where <ARCHITECTURE> must be one of:
+#
+#      ppu (PowerPC)
+#      spu
+#      SSE
+#      scalar (generic)
+#
+#   e.g.) make ARCH=cell install
+#
+
+topdir = .
+ARCH = scalar
+
+prefix_spu = /usr/spu
+
+ARCH_DIRS = $(ARCH)
+ARCH_INSTALL= $(ARCH_INSTALL_$(ARCH))
+ARCH_CHECK= $(ARCH_CHECK_$(ARCH))
+
+prefix = $(if $(prefix_$(ARCH)),$(prefix_$(ARCH)),/usr)
+DESTDIR =
+
+COMMON_DIRS = scalar
+
+INSTALL = install
+
+LIB_MAJOR_VERSION = 1
+LIB_MINOR_VERSION = 0
+LIB_RELEASE = 1
+LIB_FULL_VERSION = $(LIB_MAJOR_VERSION).$(LIB_MINOR_VERSION).$(LIB_RELEASE)
+
+LIB_BASE = vectormath
+
+TAR_NAME = $(LIB_BASE)-$(LIB_FULL_VERSION)
+TAR_BALL = $(TAR_NAME).tar.gz
+
+all:
+	@true
+
+install:
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/c
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/cpp
+	$(INSTALL) -m 644 include/vectormath/c/*.h $(DESTDIR)$(prefix)/include/vectormath/c/
+	$(INSTALL) -m 644 include/vectormath/cpp/*.h $(DESTDIR)$(prefix)/include/vectormath/cpp/
+	$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/cpp
+	for _d in $(ARCH_DIRS) $(COMMON_DIRS); do \
+		if test -d include/vectormath/$$_d/c; then \
+			$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/$$_d/c && \
+			$(INSTALL) -m 644 include/vectormath/$$_d/c/*.h \
+				$(DESTDIR)$(prefix)/include/vectormath/$$_d/c/ || exit 1; \
+		fi; \
+		if test -d include/vectormath/$$_d/cpp; then \
+			$(INSTALL) -m 755 -d $(DESTDIR)$(prefix)/include/vectormath/$$_d/cpp && \
+			$(INSTALL) -m 644 include/vectormath/$$_d/cpp/*.h \
+				$(DESTDIR)$(prefix)/include/vectormath/$$_d/cpp/ || exit 1; \
+		fi; \
+	done
+
+check:
+	$(MAKE) -C tests ARCH=$(ARCH) check
+
+clean:
+	$(MAKE) -C tests clean
+	-rm -f $(TAR_BALL)
+
+distclean:
+	$(MAKE) -C tests distclean
+
+dist:
+	-rm -rf .dist
+	mkdir -p .dist/$(TAR_NAME)
+	find . -name .dist -prune -o \
+		-name .CVS -prune -o -name .svn -prune -o \
+		-name .pc -prune  -o -name patches -prune -o \
+		'(' -name README -o -name LICENSE -o \
+			-name Makefile -o -name '*.[ch]' -o -name '*.cpp' -o \
+			-name '*.pl' -o -name '*.txt' -o -name '*.pdf' -o -name '*.spec' ')' \
+		-print | tar -T - -cf - | tar xf - -C .dist/$(TAR_NAME)
+	tar zcf $(TAR_BALL) -C .dist $(TAR_NAME)
+	-rm -rf .dist
diff --git a/Extras/vectormathlibrary/README b/Extras/vectormathlibrary/README
index 842caaea1..934829d66 100644
--- a/Extras/vectormathlibrary/README
+++ b/Extras/vectormathlibrary/README
@@ -1,86 +1,13 @@
-			 Vector math library
-
-
-* Overview
-
-  The Vector math library provides 3-D/4-D vector operations including
-  addition, outer product, multiply by a matrix, etc.
-
-
-* License
-
-  This library is licensed under the terms in the file 'LICENSE' in
-  this directory.
-
-
-* Installing
-
-  To install this library, run following commands:
-
-  - PowerPC with VMX (fully supported)
-
-    $ make ARCH=ppu install
-
-  - SPU (Cell Broadband Engine Synergistic Processor Unit)
-
-    $ make ARCH=spu install
-
-  - x86 with SSE (partially supported)
-
-    $ make ARCH=SSE install
-
-  - Other architectures (partially supported)
-
-    $ make install
-
-
-  By default, files in the library will be placed as below:
-
-  - headers
-  
-    /usr/include/vectormath/c/       (C headers)
-    /usr/include/vectormath/cpp/     (C++ headers)
-
-  - SPU headers
-  
-    /usr/spu/include/vectormath/c/   (C headers)
-    /usr/spu/include/vectormath/cpp/ (C++ headers)
-
-  No shared library, static library nor executable is installed,
-  because all functions in this library are provided as inline
-  functions.
-
-
-* Packaging
-
-  By running following command in this directory, a source tarball
-  'vectormath-<VERSION>.tar.gz' can be created:
-
-    $ make dist
-  
-  You can also create RPM packages by executing the command below with
-  the tarball:
-
-    $ rpmbuild -tb vectormath-1.0.1.tar.gz --target=ppc
-
-  One or two packages, vectormath-devel (and, in addition,
-  spu-vectormath-devel for PowerPC target), will be created.
-
-
-* Usage
-
-  See the documents `doc/*.pdf'.
-
-
-* Contacting the project
-
-  Module maintainer: Erwin Coumans
-
-  Feedback and patches:
-    http://www.bulletphysics.com/Bullet/phpBB2/viewforum.php?f=18
-
-  Main repository URL:
-    http://bullet.svn.sourceforge.net/viewvc/bullet/trunk/Extras/vectormathlibrary
-
----
-EOF
+Vector Math library for 3-D linear algebra (vector, matrix, quaternion)
+SIMD support for SSE, PowerPC (PPU) and the SPU.
+Also includes generic multi-platform scalar version.
+
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+Open Source under the new BSD license, see LICENSE
+
+Module maintainer: Erwin Coumans
+Feedback and patches:
+http://www.bulletphysics.com/Bullet/phpBB2/viewforum.php?f=18
+Main repository URL:
+http://bullet.svn.sourceforge.net/viewvc/bullet/trunk/Extras/vectormathlibrary
+
diff --git a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h
index d927f4bc8..94825a583 100644
--- a/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/SSE/cpp/vectormath_aos.h
@@ -27,8 +27,8 @@
    POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifndef _VECTORMATH_AOS_CPP_H
-#define _VECTORMATH_AOS_CPP_H
+#ifndef _VECTORMATH_AOS_CPP_SSE_H
+#define _VECTORMATH_AOS_CPP_SSE_H
 
 #include <math.h>
 #include <xmmintrin.h>
@@ -113,8 +113,8 @@ static inline __m128 vec_ctf(__m128 x, int a)
 	SSE64 sse64;
 	sse64.m128 = x;
 	__m128 result =_mm_movelh_ps(
-		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m01), 
-		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m23)); 
+		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m01),
+		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m23));
 	_mm_empty();
 	return result;
 }
diff --git a/Extras/vectormathlibrary/include/vectormath/c/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/c/vectormath_aos.h
new file mode 100644
index 000000000..329f07abc
--- /dev/null
+++ b/Extras/vectormathlibrary/include/vectormath/c/vectormath_aos.h
@@ -0,0 +1,41 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_H
+#define _VECTORMATH_AOS_C_H
+
+#if defined(__SPU__)
+#  include "../spu/c/vectormath_aos.h"
+#elif defined(__ALTIVEC__)
+#  include "../ppu/c/vectormath_aos.h"
+#else
+#  include "../scalar/c/vectormath_aos.h"
+#endif
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/c/vectormath_aos_v.h b/Extras/vectormathlibrary/include/vectormath/c/vectormath_aos_v.h
new file mode 100644
index 000000000..83368fafa
--- /dev/null
+++ b/Extras/vectormathlibrary/include/vectormath/c/vectormath_aos_v.h
@@ -0,0 +1,41 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_V_H
+#define _VECTORMATH_AOS_C_V_H
+
+#if defined(__SPU__)
+#  include "../spu/c/vectormath_aos_v.h"
+#elif defined(__ALTIVEC__)
+#  include "../ppu/c/vectormath_aos_v.h"
+#else
+#  include "../scalar/c/vectormath_aos_v.h"
+#endif
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/c/vectormath_soa.h b/Extras/vectormathlibrary/include/vectormath/c/vectormath_soa.h
new file mode 100644
index 000000000..42d079220
--- /dev/null
+++ b/Extras/vectormathlibrary/include/vectormath/c/vectormath_soa.h
@@ -0,0 +1,41 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_C_H
+#define _VECTORMATH_SOA_C_H
+
+#if defined(__SPU__)
+#  include "../spu/c/vectormath_soa.h"
+#elif defined(__ALTIVEC__)
+#  include "../ppu/c/vectormath_soa.h"
+#else
+#  error "Not implemented."
+#endif
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/c/vectormath_soa_v.h b/Extras/vectormathlibrary/include/vectormath/c/vectormath_soa_v.h
new file mode 100644
index 000000000..ade2eeea0
--- /dev/null
+++ b/Extras/vectormathlibrary/include/vectormath/c/vectormath_soa_v.h
@@ -0,0 +1,41 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_C_V_H
+#define _VECTORMATH_SOA_C_V_H
+
+#if defined(__SPU__)
+#  include "../spu/c/vectormath_soa_v.h"
+#elif defined(__ALTIVEC__)
+#  include "../ppu/c/vectormath_soa_v.h"
+#else
+#  error "Not implemented."
+#endif
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/cpp/vectormath_aos.h
new file mode 100644
index 000000000..1d5088f8f
--- /dev/null
+++ b/Extras/vectormathlibrary/include/vectormath/cpp/vectormath_aos.h
@@ -0,0 +1,43 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_H
+#define _VECTORMATH_AOS_CPP_H
+
+#if defined(__SPU__)
+#  include "../spu/cpp/vectormath_aos.h"
+#elif defined(__ALTIVEC__)
+#  include "../ppu/cpp/vectormath_aos.h"
+#elif defined(__SSE__)
+#  include "../SSE/cpp/vectormath_aos.h"
+#else
+#  include "../scalar/cpp/vectormath_aos.h"
+#endif
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/cpp/vectormath_soa.h b/Extras/vectormathlibrary/include/vectormath/cpp/vectormath_soa.h
new file mode 100644
index 000000000..287915f0f
--- /dev/null
+++ b/Extras/vectormathlibrary/include/vectormath/cpp/vectormath_soa.h
@@ -0,0 +1,41 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_CPP_H
+#define _VECTORMATH_SOA_CPP_H
+
+#if defined(__SPU__)
+#  include "../spu/cpp/vectormath_soa.h"
+#elif defined(__ALTIVEC__)
+#  include "../ppu/cpp/vectormath_soa.h"
+#else
+#  error "Not implemented."
+#endif
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos.h
index 13f482150..f6443a587 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos.h
@@ -1,1833 +1,1833 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_C_H
-#define _VECTORMATH_MAT_AOS_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
-#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
-#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
-#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( &result->col0, &mat->col0 );
-    vmathV3Copy( &result->col1, &mat->col1 );
-    vmathV3Copy( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-}
-
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
-    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
-    xyzw_2 = vec_add( unitQuat->vec128, unitQuat->vec128 );
-    wwww = vec_splat( unitQuat->vec128, 3 );
-    yzxw = vec_perm( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_PERM_YZXW );
-    zxyw = vec_perm( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_PERM_ZXYW );
-    yzxw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_YZXW );
-    zxyw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_ZXYW );
-    tmp0 = vec_madd( yzxw_2, wwww, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_nmsub( yzxw, yzxw_2, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    tmp2 = vec_madd( yzxw, xyzw_2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp0 = vec_madd( zxyw, xyzw_2, tmp0 );
-    tmp1 = vec_nmsub( zxyw, zxyw_2, tmp1 );
-    tmp2 = vec_nmsub( zxyw_2, wwww, tmp2 );
-    tmp3 = vec_sel( tmp0, tmp1, select_x );
-    tmp4 = vec_sel( tmp1, tmp2, select_x );
-    tmp5 = vec_sel( tmp2, tmp0, select_x );
-    result->col0.vec128 = vec_sel( tmp3, tmp2, select_z );
-    result->col1.vec128 = vec_sel( tmp4, tmp0, select_z );
-    result->col2.vec128 = vec_sel( tmp5, tmp1, select_z );
-}
-
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
-}
-
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathM3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, mat, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col0 );
-}
-
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col1 );
-}
-
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col2 );
-}
-
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
-{
-    vmathV3Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
-{
-    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
-}
-
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, res0, res1, res2;
-    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
-    tmp1 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
-    res0 = vec_mergeh( tmp0, mat->col1.vec128 );
-    res1 = vec_perm( tmp0, mat->col1.vec128, _VECTORMATH_PERM_ZBWX );
-    res2 = vec_perm( tmp1, mat->col1.vec128, _VECTORMATH_PERM_XCYX );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp2 = _vmathVfCross( mat->col0.vec128, mat->col1.vec128 );
-    tmp0 = _vmathVfCross( mat->col1.vec128, mat->col2.vec128 );
-    tmp1 = _vmathVfCross( mat->col2.vec128, mat->col0.vec128 );
-    dot = _vmathVfDot3( tmp2, mat->col2.vec128 );
-    dot = vec_splat( dot, 0 );
-    invdet = recipf4( dot );
-    tmp3 = vec_mergeh( tmp0, tmp2 );
-    tmp4 = vec_mergel( tmp0, tmp2 );
-    inv0 = vec_mergeh( tmp3, tmp1 );
-    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
-    inv0 = vec_madd( inv0, invdet, zero );
-    inv1 = vec_madd( inv1, invdet, zero );
-    inv2 = vec_madd( inv2, invdet, zero );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-}
-
-static inline float vmathM3Determinant( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
-    return vmathV3Dot( &mat->col2, &tmpV3_0 );
-}
-
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Neg( &result->col0, &mat->col0 );
-    vmathV3Neg( &result->col1, &mat->col1 );
-    vmathV3Neg( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3AbsPerElem( &result->col0, &mat->col0 );
-    vmathV3AbsPerElem( &result->col1, &mat->col1 );
-    vmathV3AbsPerElem( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
-}
-
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec->vec128, 0 );
-    yyyy = vec_splat( vec->vec128, 1 );
-    zzzz = vec_splat( vec->vec128, 2 );
-    res = vec_madd( mat->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( mat->col1.vec128, yyyy, res );
-    res = vec_madd( mat->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    VmathMatrix3 tmpResult;
-    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM3Copy( result, &tmpResult );
-}
-
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    vmathV3MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV3MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
-{
-    VmathVector4 tmpV4_0;
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
-    angles = tmpV4_0.vec128;
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    result->col0.vec128 = vec_madd( Z0, Y0, zero );
-    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
-    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
-}
-
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    axis = unitVec->vec128;
-    sincosf4( (vec_float4){radians,radians,radians,radians}, &s, &c );
-    xxxx = vec_splat( axis, 0 );
-    yyyy = vec_splat( axis, 1 );
-    zzzz = vec_splat( axis, 2 );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    axisS = vec_madd( axis, s, zero );
-    negAxisS = negatef4( axisS );
-    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
-    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
-    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
-    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
-    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
-    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
-    result->col0.vec128 = vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 );
-    result->col1.vec128 = vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 );
-    result->col2.vec128 = vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 );
-}
-
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    vmathM3MakeFromQ( result, unitQuat );
-}
-
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
-    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
-    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
-}
-
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-}
-
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
-{
-    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
-}
-
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathM3GetRow( &tmpV3_0, mat, 0 );
-    vmathV3Print( &tmpV3_0 );
-    vmathM3GetRow( &tmpV3_1, mat, 1 );
-    vmathV3Print( &tmpV3_1 );
-    vmathM3GetRow( &tmpV3_2, mat, 2 );
-    vmathV3Print( &tmpV3_2 );
-}
-
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM3Print( mat );
-}
-
-#endif
-
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( &result->col0, &mat->col0 );
-    vmathV4Copy( &result->col1, &mat->col1 );
-    vmathV4Copy( &result->col2, &mat->col2 );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
-{
-    vmathV4MakeFromScalar( &result->col0, scalar );
-    vmathV4MakeFromScalar( &result->col1, scalar );
-    vmathV4MakeFromScalar( &result->col2, scalar );
-    vmathV4MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
-}
-
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-    vmathV4Copy( &result->col1, _col1 );
-    vmathV4Copy( &result->col2, _col2 );
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 mat;
-    vmathM3MakeFromQ( &mat, unitQuat );
-    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
-{
-    vmathV4Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
-{
-    vmathV4Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
-{
-    vmathV4Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
-{
-    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
-{
-    VmathVector4 tmpV3_0;
-    vmathM4GetCol( &tmpV3_0, result, col );
-    vmathV4SetElem( &tmpV3_0, row, val );
-    vmathM4SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
-{
-    VmathVector4 tmpV4_0;
-    vmathM4GetCol( &tmpV4_0, mat, col );
-    return vmathV4GetElem( &tmpV4_0, row );
-}
-
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col0 );
-}
-
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col1 );
-}
-
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col2 );
-}
-
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col3 );
-}
-
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
-{
-    vmathV4Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
-{
-    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
-}
-
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
-    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
-    tmp1 = vec_mergeh( mat->col1.vec128, mat->col3.vec128 );
-    tmp2 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
-    tmp3 = vec_mergel( mat->col1.vec128, mat->col3.vec128 );
-    res0 = vec_mergeh( tmp0, tmp1 );
-    res1 = vec_mergel( tmp0, tmp1 );
-    res2 = vec_mergeh( tmp2, tmp3 );
-    res3 = vec_mergel( tmp2, tmp3 );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    result->col3.vec128 = res3;
-}
-
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vector float in0, in1, in2, in3;
-    vector float tmp0, tmp1, tmp2, tmp3;
-    vector float cof0, cof1, cof2, cof3;
-    vector float t0, t1, t2, t3;
-    vector float t01, t02, t03, t12, t23;
-    vector float t1r, t2r;
-    vector float t01r, t02r, t03r, t12r, t23r;
-    vector float t1r3, t1r3r;
-    vector float det, det0, det1, det2, det3, invdet;
-    vector float vzero = (vector float){0.0};
-    in0 = mat->col0.vec128;
-    in1 = mat->col1.vec128;
-    in2 = mat->col2.vec128;
-    in3 = mat->col3.vec128;
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
-    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
-    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
-    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
-    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
-    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
-    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
-    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
-    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
-    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
-    cof1 = vec_nmsub(t0, t23, vzero);		/* -(AGP ECL IOH MKD) */
-    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
-    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
-    cof1 = vec_madd(t0, t23r, cof1);		/* AOH EKD IGP MCL + cof1 */
-    cof1 = vec_sld(cof1, cof1, 8);		/* IGP MCL AOH EKD - IOH MKD AGP ECL */
-    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
-    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
-    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
-    cof3 = vec_madd(t0, t12, vzero);		/* ANG EJC IFO MBK */
-    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
-    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
-    cof3 = vec_nmsub(t0, t12r, cof3);		/* cof3 - AFO EBK ING MJC */
-    cof3 = vec_sld(cof3, cof3, 8);		/* ING MJC AFO EBK - IFO MBK ANG EJC */
-    t1r = vec_sld(t1, t1, 8);			/* B F J N */
-    t2r = vec_sld(t2, t2, 8);			/* K O C G */
-    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
-    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
-    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
-    cof2 = vec_madd(t0, t1r3, vzero);		/* AFP EBL INH MJD */
-    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
-    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
-    cof2 = vec_nmsub(t0, t1r3r, cof2);		/* cof2 - ANH EJD IFP MBL */
-    cof2 = vec_sld(cof2, cof2, 8);		/* IFP MBL ANH EJD - INH MJD AFP EBL */
-    t01 = vec_madd(t0, t1, vzero);		/* AJ EN IB MF */
-    t01 = vec_perm(t01, t01, _VECTORMATH_PERM_YXWZ);	/* EN AJ MF IB */
-    cof2 = vec_nmsub(t3, t01, cof2);		/* cof2 - LEN PAJ DMF HIB */
-    cof3 = vec_madd(t2r, t01, cof3);		/* KEN OAJ CMF GIB + cof3 */ 
-    t01r = vec_sld(t01, t01, 8);			/* MF IB EN AJ */
-    cof2 = vec_madd(t3, t01r, cof2);		/* LMF PIB DEN HAJ + cof2 */
-    cof3 = vec_nmsub(t2r, t01r, cof3);		/* cof3 - KMF OIB CEN GAJ */
-    t03 = vec_madd(t0, t3, vzero);		/* AL EP ID MH */
-    t03 = vec_perm(t03, t03, _VECTORMATH_PERM_YXWZ);	/* EP AL MH ID */
-    cof1 = vec_nmsub(t2r, t03, cof1);		/* cof1 - KEP OAL CMH GID */
-    cof2 = vec_madd(t1, t03, cof2);		/* JEP NAL BMH FID + cof2 */
-    t03r = vec_sld(t03, t03, 8);			/* MH ID EP AL */
-    cof1 = vec_madd(t2r, t03r, cof1);		/* KMH OID CEP GAL + cof1 */
-    cof2 = vec_nmsub(t1, t03r, cof2);		/* cof2 - JMH NID BEP FAL */ 
-    t02 = vec_madd(t0, t2r, vzero);		/* AK EO IC MG */
-    t02 = vec_perm(t02, t02, _VECTORMATH_PERM_YXWZ);	/* E0 AK MG IC */
-    cof1 = vec_madd(t3, t02, cof1);		/* LEO PAK DMG HIC + cof1 */
-    cof3 = vec_nmsub(t1, t02, cof3);		/* cof3 - JEO NAK BMG FIC */
-    t02r = vec_sld(t02, t02, 8);			/* MG IC EO AK */
-    cof1 = vec_nmsub(t3, t02r, cof1);		/* cof1 - LMG PIC DEO HAK */
-    cof3 = vec_madd(t1, t02r, cof3);		/* JMG NIC BEO FAK + cof3 */
-    /* Compute the determinant of the matrix 
-     *
-     * det = sum_across(t0 * cof0);
-     *
-     * We perform a sum across the entire vector so that 
-     * we don't have to splat the result when multiplying the
-     * cofactors by the inverse of the determinant.
-     */
-    det  = vec_madd(t0, cof0, vzero);
-    det0 = vec_splat(det, 0);
-    det1 = vec_splat(det, 1);
-    det2 = vec_splat(det, 2);
-    det3 = vec_splat(det, 3);
-    det  = vec_add(det0, det1);
-    det2 = vec_add(det2, det3);
-    det  = vec_add(det, det2);
-    /* Compute the reciprocal of the determinant.
-     */
-    invdet = recipf4(det);
-    /* Multiply the cofactors by the reciprocal of the determinant.
-     */ 
-    result->col0.vec128 = vec_madd(cof0, invdet, vzero);
-    result->col1.vec128 = vec_madd(cof1, invdet, vzero);
-    result->col2.vec128 = vec_madd(cof2, invdet, vzero);
-    result->col3.vec128 = vec_madd(cof3, invdet, vzero);
-}
-
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3Inverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline float vmathM4Determinant( const VmathMatrix4 *mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vector float in0, in1, in2, in3;
-    vector float tmp0, tmp1, tmp2, tmp3;
-    vector float cof0;
-    vector float t0, t1, t2, t3;
-    vector float t12, t23;
-    vector float t1r, t2r;
-    vector float t12r, t23r;
-    vector float t1r3, t1r3r;
-    vector float vzero = (vector float){0.0};
-    union { vec_float4 v; float s[4]; } tmp;
-    in0 = mat->col0.vec128;
-    in1 = mat->col1.vec128;
-    in2 = mat->col2.vec128;
-    in3 = mat->col3.vec128;
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
-    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
-    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
-    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
-    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
-    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
-    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
-    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
-    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
-    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
-    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
-    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
-    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
-    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
-    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
-    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
-    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
-    t1r = vec_sld(t1, t1, 8);			/* B F J N */
-    t2r = vec_sld(t2, t2, 8);			/* K O C G */
-    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
-    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
-    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
-    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
-    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
-    tmp.v = _vmathVfDot4(t0,cof0);
-    return tmp.s[0];
-}
-
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Neg( &result->col0, &mat->col0 );
-    vmathV4Neg( &result->col1, &mat->col1 );
-    vmathV4Neg( &result->col2, &mat->col2 );
-    vmathV4Neg( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4AbsPerElem( &result->col0, &mat->col0 );
-    vmathV4AbsPerElem( &result->col1, &mat->col1 );
-    vmathV4AbsPerElem( &result->col2, &mat->col2 );
-    vmathV4AbsPerElem( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
-    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
-}
-
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz, wwww;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( vec->vec128, 0 );
-    yyyy = vec_splat( vec->vec128, 1 );
-    zzzz = vec_splat( vec->vec128, 2 );
-    wwww = vec_splat( vec->vec128, 3 );
-    tmp0 = vec_madd( mat->col0.vec128, xxxx, zero );
-    tmp1 = vec_madd( mat->col1.vec128, yyyy, zero );
-    tmp0 = vec_madd( mat->col2.vec128, zzzz, tmp0 );
-    tmp1 = vec_madd( mat->col3.vec128, wwww, tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec->vec128, 0 );
-    yyyy = vec_splat( vec->vec128, 1 );
-    zzzz = vec_splat( vec->vec128, 2 );
-    res = vec_madd( mat->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( mat->col1.vec128, yyyy, res );
-    res = vec_madd( mat->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( pnt->vec128, 0 );
-    yyyy = vec_splat( pnt->vec128, 1 );
-    zzzz = vec_splat( pnt->vec128, 2 );
-    tmp0 = vec_madd( mat->col0.vec128, xxxx, zero );
-    tmp1 = vec_madd( mat->col1.vec128, yyyy, zero );
-    tmp0 = vec_madd( mat->col2.vec128, zzzz, tmp0 );
-    tmp1 = vec_add( mat->col3.vec128, tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    VmathMatrix4 tmpResult;
-    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
-{
-    VmathMatrix4 tmpResult;
-    VmathPoint3 tmpP3_0;
-    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
-    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
-    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
-{
-    vmathV4SetXYZ( &result->col0, &mat3->col0 );
-    vmathV4SetXYZ( &result->col1, &mat3->col1 );
-    vmathV4SetXYZ( &result->col2, &mat3->col2 );
-}
-
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( &result->col0, &mat->col0 );
-    vmathV4GetXYZ( &result->col1, &mat->col1 );
-    vmathV4GetXYZ( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4SetXYZ( &result->col3, translateVec );
-}
-
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( result, &mat->col3 );
-}
-
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    vmathV4MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV4MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
-{
-    VmathVector4 tmpV4_0;
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
-    angles = tmpV4_0.vec128;
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    result->col0.vec128 = vec_madd( Z0, Y0, zero );
-    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
-    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    axis = unitVec->vec128;
-    sincosf4( (vec_float4){radians,radians,radians,radians}, &s, &c );
-    xxxx = vec_splat( axis, 0 );
-    yyyy = vec_splat( axis, 1 );
-    zzzz = vec_splat( axis, 2 );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    axisS = vec_madd( axis, s, zero );
-    negAxisS = negatef4( axisS );
-    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
-    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
-    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
-    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
-    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
-    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
-    zeroW = (vec_float4)_VECTORMATH_MASK_0x000F;
-    axis = vec_andc( axis, zeroW );
-    tmp0 = vec_andc( tmp0, zeroW );
-    tmp1 = vec_andc( tmp1, zeroW );
-    tmp2 = vec_andc( tmp2, zeroW );
-    result->col0.vec128 = vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 );
-    result->col1.vec128 = vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 );
-    result->col2.vec128 = vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
-{
-    VmathTransform3 tmpT3_0;
-    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
-    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
-    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
-{
-    VmathVector4 scale4;
-    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
-    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
-    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
-    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
-    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
-}
-
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
-{
-    VmathMatrix4 m4EyeFrame;
-    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathV3Normalize( &v3Y, upVec );
-    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
-    vmathV3Normalize( &v3Z, &tmpV3_0 );
-    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
-    vmathV3Normalize( &v3X, &tmpV3_1 );
-    vmathV3Cross( &v3Y, &v3Z, &v3X );
-    vmathV4MakeFromV3( &tmpV4_0, &v3X );
-    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
-    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
-    vmathV4MakeFromP3( &tmpV4_3, eyePos );
-    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
-    vmathM4OrthoInverse( result, &m4EyeFrame );
-}
-
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    vec_float4 zero, col0, col1, col2, col3;
-    union { vec_float4 v; float s[4]; } tmp;
-    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
-    rangeInv = 1.0f / ( zNear - zFar );
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp.v = zero;
-    tmp.s[0] = f / aspect;
-    col0 = tmp.v;
-    tmp.v = zero;
-    tmp.s[1] = f;
-    col1 = tmp.v;
-    tmp.v = zero;
-    tmp.s[2] = ( zNear + zFar ) * rangeInv;
-    tmp.s[3] = -1.0f;
-    col2 = tmp.v;
-    tmp.v = zero;
-    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
-    col3 = tmp.v;
-    result->col0.vec128 = col0;
-    result->col1.vec128 = col1;
-    result->col2.vec128 = col2;
-    result->col3.vec128 = col3;
-}
-
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff;
-    vec_float4 diagonal, column, near2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
-    l.s[0] = left;
-    f.s[0] = zFar;
-    r.s[0] = right;
-    n.s[0] = zNear;
-    b.s[0] = bottom;
-    t.s[0] = top;
-    lbf = vec_mergeh( l.v, f.v );
-    rtn = vec_mergeh( r.v, n.v );
-    lbf = vec_mergeh( lbf, b.v );
-    rtn = vec_mergeh( rtn, t.v );
-    diff = vec_sub( rtn, lbf );
-    sum  = vec_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    near2 = vec_splat( n.v, 0 );
-    near2 = vec_add( near2, near2 );
-    diagonal = vec_madd( near2, inv_diff, zero );
-    column = vec_madd( sum, inv_diff, zero );
-    result->col0.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 );
-    result->col1.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 );
-    result->col2.vec128 = vec_sel( column, ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}), _VECTORMATH_MASK_0x000F );
-    result->col3.vec128 = vec_sel( zero, vec_madd( diagonal, vec_splat( f.v, 0 ), zero ), _VECTORMATH_MASK_0x00F0 );
-}
-
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff, neg_inv_diff;
-    vec_float4 diagonal, column;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
-    l.s[0] = left;
-    f.s[0] = zFar;
-    r.s[0] = right;
-    n.s[0] = zNear;
-    b.s[0] = bottom;
-    t.s[0] = top;
-    lbf = vec_mergeh( l.v, f.v );
-    rtn = vec_mergeh( r.v, n.v );
-    lbf = vec_mergeh( lbf, b.v );
-    rtn = vec_mergeh( rtn, t.v );
-    diff = vec_sub( rtn, lbf );
-    sum  = vec_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    neg_inv_diff = negatef4( inv_diff );
-    diagonal = vec_add( inv_diff, inv_diff );
-    column = vec_madd( sum, vec_sel( neg_inv_diff, inv_diff, _VECTORMATH_MASK_0x00F0 ), zero );
-    result->col0.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 );
-    result->col1.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 );
-    result->col2.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x00F0 );
-    result->col3.vec128 = vec_sel( column, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), _VECTORMATH_MASK_0x000F );
-}
-
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
-{
-    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print( const VmathMatrix4 *mat )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathM4GetRow( &tmpV4_0, mat, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathM4GetRow( &tmpV4_1, mat, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathM4GetRow( &tmpV4_2, mat, 2 );
-    vmathV4Print( &tmpV4_2 );
-    vmathM4GetRow( &tmpV4_3, mat, 3 );
-    vmathV4Print( &tmpV4_3 );
-}
-
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM4Print( mat );
-}
-
-#endif
-
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-    vmathV3MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
-{
-    vmathT3SetUpper3x3( result, tfrm );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 tmpM3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathT3SetUpper3x3( result, &tmpM3_0 );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathT3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, tfrm, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col0 );
-}
-
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col1 );
-}
-
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col2 );
-}
-
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
-{
-    vmathV3Copy( result, (&tfrm->col0 + col) );
-}
-
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
-{
-    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
-}
-
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp2 = _vmathVfCross( tfrm->col0.vec128, tfrm->col1.vec128 );
-    tmp0 = _vmathVfCross( tfrm->col1.vec128, tfrm->col2.vec128 );
-    tmp1 = _vmathVfCross( tfrm->col2.vec128, tfrm->col0.vec128 );
-    inv3 = negatef4( tfrm->col3.vec128 );
-    dot = _vmathVfDot3( tmp2, tfrm->col2.vec128 );
-    dot = vec_splat( dot, 0 );
-    invdet = recipf4( dot );
-    tmp3 = vec_mergeh( tmp0, tmp2 );
-    tmp4 = vec_mergel( tmp0, tmp2 );
-    inv0 = vec_mergeh( tmp3, tmp1 );
-    xxxx = vec_splat( inv3, 0 );
-    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( inv3, 1 );
-    zzzz = vec_splat( inv3, 2 );
-    inv3 = vec_madd( inv0, xxxx, zero );
-    inv3 = vec_madd( inv1, yyyy, inv3 );
-    inv3 = vec_madd( inv2, zzzz, inv3 );
-    inv0 = vec_madd( inv0, invdet, zero );
-    inv1 = vec_madd( inv1, invdet, zero );
-    inv2 = vec_madd( inv2, invdet, zero );
-    inv3 = vec_madd( inv3, invdet, zero );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-    result->col3.vec128 = inv3;
-}
-
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1;
-    vec_float4 xxxx, yyyy, zzzz;
-    tmp0 = vec_mergeh( tfrm->col0.vec128, tfrm->col2.vec128 );
-    tmp1 = vec_mergel( tfrm->col0.vec128, tfrm->col2.vec128 );
-    inv3 = negatef4( tfrm->col3.vec128 );
-    inv0 = vec_mergeh( tmp0, tfrm->col1.vec128 );
-    xxxx = vec_splat( inv3, 0 );
-    inv1 = vec_perm( tmp0, tfrm->col1.vec128, _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp1, tfrm->col1.vec128, _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( inv3, 1 );
-    zzzz = vec_splat( inv3, 2 );
-    inv3 = vec_madd( inv0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    inv3 = vec_madd( inv1, yyyy, inv3 );
-    inv3 = vec_madd( inv2, zzzz, inv3 );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-    result->col3.vec128 = inv3;
-}
-
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
-    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
-    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
-    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec->vec128, 0 );
-    yyyy = vec_splat( vec->vec128, 1 );
-    zzzz = vec_splat( vec->vec128, 2 );
-    res = vec_madd( tfrm->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( tfrm->col1.vec128, yyyy, res );
-    res = vec_madd( tfrm->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( pnt->vec128, 0 );
-    yyyy = vec_splat( pnt->vec128, 1 );
-    zzzz = vec_splat( pnt->vec128, 2 );
-    tmp0 = vec_madd( tfrm->col0.vec128, xxxx, zero );
-    tmp1 = vec_madd( tfrm->col1.vec128, yyyy, zero );
-    tmp0 = vec_madd( tfrm->col2.vec128, zzzz, tmp0 );
-    tmp1 = vec_add( tfrm->col3.vec128, tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    VmathTransform3 tmpResult;
-    VmathPoint3 tmpP3_0, tmpP3_1;
-    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
-    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
-    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
-    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
-    vmathT3Copy( result, &tmpResult );
-}
-
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
-    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
-    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
-    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
-}
-
-static inline void vmathT3MakeIdentity( VmathTransform3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-}
-
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
-{
-    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
-}
-
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    vmathV3MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV3MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
-{
-    VmathVector4 tmpV4_0;
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
-    angles = tmpV4_0.vec128;
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    result->col0.vec128 = vec_madd( Z0, Y0, zero );
-    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
-    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
-    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
-    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
-    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
-}
-
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
-    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
-    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
-    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print( const VmathTransform3 *tfrm )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
-    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
-    vmathV4Print( &tmpV4_2 );
-}
-
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
-{
-    printf("%s:\n", name);
-    vmathT3Print( tfrm );
-}
-
-#endif
-
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
-{
-    vec_float4 res;
-    vec_float4 col0, col1, col2;
-    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
-    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
-    vec_float4 radicand, invSqrt, scale;
-    vec_float4 res0, res1, res2, res3;
-    vec_float4 xx, yy, zz;
-    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
-    vec_uint4 select_y = _VECTORMATH_MASK_0x0F00;
-    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
-    vec_uint4 select_w = _VECTORMATH_MASK_0x000F;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-
-    col0 = tfrm->col0.vec128;
-    col1 = tfrm->col1.vec128;
-    col2 = tfrm->col2.vec128;
-
-    /* four cases: */
-    /* trace > 0 */
-    /* else */
-    /*    xx largest diagonal element */
-    /*    yy largest diagonal element */
-    /*    zz largest diagonal element */
-
-    /* compute quaternion for each case */
-
-    xx_yy = vec_sel( col0, col1, select_y );
-    xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
-    yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
-    zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
-
-    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    invSqrt = rsqrtf4( radicand );
-
-    zy_xz_yx = vec_sel( col0, col1, select_z );
-    zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
-    yz_zx_xy = vec_sel( col0, col1, select_x );
-    yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
-
-    sum = vec_add( zy_xz_yx, yz_zx_xy );
-    diff = vec_sub( zy_xz_yx, yz_zx_xy );
-
-    scale = vec_madd( invSqrt, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), zero );
-    res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
-    res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
-    res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
-    res3 = diff;
-    res0 = vec_sel( res0, radicand, select_x );
-    res1 = vec_sel( res1, radicand, select_y );
-    res2 = vec_sel( res2, radicand, select_z );
-    res3 = vec_sel( res3, radicand, select_w );
-    res0 = vec_madd( res0, vec_splat( scale, 0 ), zero );
-    res1 = vec_madd( res1, vec_splat( scale, 1 ), zero );
-    res2 = vec_madd( res2, vec_splat( scale, 2 ), zero );
-    res3 = vec_madd( res3, vec_splat( scale, 3 ), zero );
-
-    /* determine case and select answer */
-
-    xx = vec_splat( col0, 0 );
-    yy = vec_splat( col1, 1 );
-    zz = vec_splat( col2, 2 );
-    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
-    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
-    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), zero ) );
-    result->vec128 = res;
-}
-
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
-{
-    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
-    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
-    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
-}
-
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
-{
-    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
-    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
-    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
-    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
-}
-
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
-    tmp1 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
-    xxxx = vec_splat( vec->vec128, 0 );
-    mcol0 = vec_mergeh( tmp0, mat->col1.vec128 );
-    mcol1 = vec_perm( tmp0, mat->col1.vec128, _VECTORMATH_PERM_ZBWX );
-    mcol2 = vec_perm( tmp1, mat->col1.vec128, _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( vec->vec128, 1 );
-    res = vec_madd( mcol0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zzzz = vec_splat( vec->vec128, 2 );
-    res = vec_madd( mcol1, yyyy, res );
-    res = vec_madd( mcol2, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
-{
-    vec_float4 neg, res0, res1, res2;
-    neg = negatef4( vec->vec128 );
-    res0 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_XZBX );
-    res1 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_CXXX );
-    res2 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_YAXX );
-    res0 = vec_andc( res0, (vec_float4)_VECTORMATH_MASK_0xF000 );
-    res1 = vec_andc( res1, (vec_float4)_VECTORMATH_MASK_0x0F00 );
-    res2 = vec_andc( res2, (vec_float4)_VECTORMATH_MASK_0x00F0 );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
-    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
-    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
-    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_C_H
+#define _VECTORMATH_MAT_AOS_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
+#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
+#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
+#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( &result->col0, &mat->col0 );
+    vmathV3Copy( &result->col1, &mat->col1 );
+    vmathV3Copy( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+}
+
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
+    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
+    xyzw_2 = vec_add( unitQuat->vec128, unitQuat->vec128 );
+    wwww = vec_splat( unitQuat->vec128, 3 );
+    yzxw = vec_perm( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_PERM_YZXW );
+    zxyw = vec_perm( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_PERM_ZXYW );
+    yzxw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_YZXW );
+    zxyw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_ZXYW );
+    tmp0 = vec_madd( yzxw_2, wwww, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_nmsub( yzxw, yzxw_2, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    tmp2 = vec_madd( yzxw, xyzw_2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp0 = vec_madd( zxyw, xyzw_2, tmp0 );
+    tmp1 = vec_nmsub( zxyw, zxyw_2, tmp1 );
+    tmp2 = vec_nmsub( zxyw_2, wwww, tmp2 );
+    tmp3 = vec_sel( tmp0, tmp1, select_x );
+    tmp4 = vec_sel( tmp1, tmp2, select_x );
+    tmp5 = vec_sel( tmp2, tmp0, select_x );
+    result->col0.vec128 = vec_sel( tmp3, tmp2, select_z );
+    result->col1.vec128 = vec_sel( tmp4, tmp0, select_z );
+    result->col2.vec128 = vec_sel( tmp5, tmp1, select_z );
+}
+
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
+}
+
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathM3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, mat, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col0 );
+}
+
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col1 );
+}
+
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col2 );
+}
+
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
+{
+    vmathV3Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
+{
+    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
+}
+
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, res0, res1, res2;
+    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
+    tmp1 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
+    res0 = vec_mergeh( tmp0, mat->col1.vec128 );
+    res1 = vec_perm( tmp0, mat->col1.vec128, _VECTORMATH_PERM_ZBWX );
+    res2 = vec_perm( tmp1, mat->col1.vec128, _VECTORMATH_PERM_XCYX );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp2 = _vmathVfCross( mat->col0.vec128, mat->col1.vec128 );
+    tmp0 = _vmathVfCross( mat->col1.vec128, mat->col2.vec128 );
+    tmp1 = _vmathVfCross( mat->col2.vec128, mat->col0.vec128 );
+    dot = _vmathVfDot3( tmp2, mat->col2.vec128 );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+    inv0 = vec_madd( inv0, invdet, zero );
+    inv1 = vec_madd( inv1, invdet, zero );
+    inv2 = vec_madd( inv2, invdet, zero );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+}
+
+static inline float vmathM3Determinant( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
+    return vmathV3Dot( &mat->col2, &tmpV3_0 );
+}
+
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Neg( &result->col0, &mat->col0 );
+    vmathV3Neg( &result->col1, &mat->col1 );
+    vmathV3Neg( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3AbsPerElem( &result->col0, &mat->col0 );
+    vmathV3AbsPerElem( &result->col1, &mat->col1 );
+    vmathV3AbsPerElem( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
+}
+
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec->vec128, 0 );
+    yyyy = vec_splat( vec->vec128, 1 );
+    zzzz = vec_splat( vec->vec128, 2 );
+    res = vec_madd( mat->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( mat->col1.vec128, yyyy, res );
+    res = vec_madd( mat->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    VmathMatrix3 tmpResult;
+    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM3Copy( result, &tmpResult );
+}
+
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    vmathV3MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV3MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
+{
+    VmathVector4 tmpV4_0;
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
+    angles = tmpV4_0.vec128;
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    result->col0.vec128 = vec_madd( Z0, Y0, zero );
+    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
+    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
+}
+
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    axis = unitVec->vec128;
+    sincosf4( (vec_float4){radians,radians,radians,radians}, &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    axisS = vec_madd( axis, s, zero );
+    negAxisS = negatef4( axisS );
+    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
+    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
+    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
+    result->col0.vec128 = vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 );
+    result->col1.vec128 = vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 );
+    result->col2.vec128 = vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 );
+}
+
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    vmathM3MakeFromQ( result, unitQuat );
+}
+
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
+    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
+    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
+}
+
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+}
+
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
+{
+    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
+}
+
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathM3GetRow( &tmpV3_0, mat, 0 );
+    vmathV3Print( &tmpV3_0 );
+    vmathM3GetRow( &tmpV3_1, mat, 1 );
+    vmathV3Print( &tmpV3_1 );
+    vmathM3GetRow( &tmpV3_2, mat, 2 );
+    vmathV3Print( &tmpV3_2 );
+}
+
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM3Print( mat );
+}
+
+#endif
+
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( &result->col0, &mat->col0 );
+    vmathV4Copy( &result->col1, &mat->col1 );
+    vmathV4Copy( &result->col2, &mat->col2 );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
+{
+    vmathV4MakeFromScalar( &result->col0, scalar );
+    vmathV4MakeFromScalar( &result->col1, scalar );
+    vmathV4MakeFromScalar( &result->col2, scalar );
+    vmathV4MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
+}
+
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+    vmathV4Copy( &result->col1, _col1 );
+    vmathV4Copy( &result->col2, _col2 );
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 mat;
+    vmathM3MakeFromQ( &mat, unitQuat );
+    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
+{
+    vmathV4Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
+{
+    vmathV4Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
+{
+    vmathV4Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
+{
+    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
+{
+    VmathVector4 tmpV3_0;
+    vmathM4GetCol( &tmpV3_0, result, col );
+    vmathV4SetElem( &tmpV3_0, row, val );
+    vmathM4SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
+{
+    VmathVector4 tmpV4_0;
+    vmathM4GetCol( &tmpV4_0, mat, col );
+    return vmathV4GetElem( &tmpV4_0, row );
+}
+
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col0 );
+}
+
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col1 );
+}
+
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col2 );
+}
+
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col3 );
+}
+
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
+{
+    vmathV4Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
+{
+    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
+}
+
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
+    tmp1 = vec_mergeh( mat->col1.vec128, mat->col3.vec128 );
+    tmp2 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
+    tmp3 = vec_mergel( mat->col1.vec128, mat->col3.vec128 );
+    res0 = vec_mergeh( tmp0, tmp1 );
+    res1 = vec_mergel( tmp0, tmp1 );
+    res2 = vec_mergeh( tmp2, tmp3 );
+    res3 = vec_mergel( tmp2, tmp3 );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    result->col3.vec128 = res3;
+}
+
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vector float in0, in1, in2, in3;
+    vector float tmp0, tmp1, tmp2, tmp3;
+    vector float cof0, cof1, cof2, cof3;
+    vector float t0, t1, t2, t3;
+    vector float t01, t02, t03, t12, t23;
+    vector float t1r, t2r;
+    vector float t01r, t02r, t03r, t12r, t23r;
+    vector float t1r3, t1r3r;
+    vector float det, det0, det1, det2, det3, invdet;
+    vector float vzero = (vector float){0.0};
+    in0 = mat->col0.vec128;
+    in1 = mat->col1.vec128;
+    in2 = mat->col2.vec128;
+    in3 = mat->col3.vec128;
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
+    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
+    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
+    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
+    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
+    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
+    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
+    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
+    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
+    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
+    cof1 = vec_nmsub(t0, t23, vzero);		/* -(AGP ECL IOH MKD) */
+    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
+    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
+    cof1 = vec_madd(t0, t23r, cof1);		/* AOH EKD IGP MCL + cof1 */
+    cof1 = vec_sld(cof1, cof1, 8);		/* IGP MCL AOH EKD - IOH MKD AGP ECL */
+    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
+    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
+    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
+    cof3 = vec_madd(t0, t12, vzero);		/* ANG EJC IFO MBK */
+    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
+    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
+    cof3 = vec_nmsub(t0, t12r, cof3);		/* cof3 - AFO EBK ING MJC */
+    cof3 = vec_sld(cof3, cof3, 8);		/* ING MJC AFO EBK - IFO MBK ANG EJC */
+    t1r = vec_sld(t1, t1, 8);			/* B F J N */
+    t2r = vec_sld(t2, t2, 8);			/* K O C G */
+    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
+    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
+    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
+    cof2 = vec_madd(t0, t1r3, vzero);		/* AFP EBL INH MJD */
+    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
+    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
+    cof2 = vec_nmsub(t0, t1r3r, cof2);		/* cof2 - ANH EJD IFP MBL */
+    cof2 = vec_sld(cof2, cof2, 8);		/* IFP MBL ANH EJD - INH MJD AFP EBL */
+    t01 = vec_madd(t0, t1, vzero);		/* AJ EN IB MF */
+    t01 = vec_perm(t01, t01, _VECTORMATH_PERM_YXWZ);	/* EN AJ MF IB */
+    cof2 = vec_nmsub(t3, t01, cof2);		/* cof2 - LEN PAJ DMF HIB */
+    cof3 = vec_madd(t2r, t01, cof3);		/* KEN OAJ CMF GIB + cof3 */ 
+    t01r = vec_sld(t01, t01, 8);			/* MF IB EN AJ */
+    cof2 = vec_madd(t3, t01r, cof2);		/* LMF PIB DEN HAJ + cof2 */
+    cof3 = vec_nmsub(t2r, t01r, cof3);		/* cof3 - KMF OIB CEN GAJ */
+    t03 = vec_madd(t0, t3, vzero);		/* AL EP ID MH */
+    t03 = vec_perm(t03, t03, _VECTORMATH_PERM_YXWZ);	/* EP AL MH ID */
+    cof1 = vec_nmsub(t2r, t03, cof1);		/* cof1 - KEP OAL CMH GID */
+    cof2 = vec_madd(t1, t03, cof2);		/* JEP NAL BMH FID + cof2 */
+    t03r = vec_sld(t03, t03, 8);			/* MH ID EP AL */
+    cof1 = vec_madd(t2r, t03r, cof1);		/* KMH OID CEP GAL + cof1 */
+    cof2 = vec_nmsub(t1, t03r, cof2);		/* cof2 - JMH NID BEP FAL */ 
+    t02 = vec_madd(t0, t2r, vzero);		/* AK EO IC MG */
+    t02 = vec_perm(t02, t02, _VECTORMATH_PERM_YXWZ);	/* E0 AK MG IC */
+    cof1 = vec_madd(t3, t02, cof1);		/* LEO PAK DMG HIC + cof1 */
+    cof3 = vec_nmsub(t1, t02, cof3);		/* cof3 - JEO NAK BMG FIC */
+    t02r = vec_sld(t02, t02, 8);			/* MG IC EO AK */
+    cof1 = vec_nmsub(t3, t02r, cof1);		/* cof1 - LMG PIC DEO HAK */
+    cof3 = vec_madd(t1, t02r, cof3);		/* JMG NIC BEO FAK + cof3 */
+    /* Compute the determinant of the matrix 
+     *
+     * det = sum_across(t0 * cof0);
+     *
+     * We perform a sum across the entire vector so that 
+     * we don't have to splat the result when multiplying the
+     * cofactors by the inverse of the determinant.
+     */
+    det  = vec_madd(t0, cof0, vzero);
+    det0 = vec_splat(det, 0);
+    det1 = vec_splat(det, 1);
+    det2 = vec_splat(det, 2);
+    det3 = vec_splat(det, 3);
+    det  = vec_add(det0, det1);
+    det2 = vec_add(det2, det3);
+    det  = vec_add(det, det2);
+    /* Compute the reciprocal of the determinant.
+     */
+    invdet = recipf4(det);
+    /* Multiply the cofactors by the reciprocal of the determinant.
+     */ 
+    result->col0.vec128 = vec_madd(cof0, invdet, vzero);
+    result->col1.vec128 = vec_madd(cof1, invdet, vzero);
+    result->col2.vec128 = vec_madd(cof2, invdet, vzero);
+    result->col3.vec128 = vec_madd(cof3, invdet, vzero);
+}
+
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3Inverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline float vmathM4Determinant( const VmathMatrix4 *mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vector float in0, in1, in2, in3;
+    vector float tmp0, tmp1, tmp2, tmp3;
+    vector float cof0;
+    vector float t0, t1, t2, t3;
+    vector float t12, t23;
+    vector float t1r, t2r;
+    vector float t12r, t23r;
+    vector float t1r3, t1r3r;
+    vector float vzero = (vector float){0.0};
+    union { vec_float4 v; float s[4]; } tmp;
+    in0 = mat->col0.vec128;
+    in1 = mat->col1.vec128;
+    in2 = mat->col2.vec128;
+    in3 = mat->col3.vec128;
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
+    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
+    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
+    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
+    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
+    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
+    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
+    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
+    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
+    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
+    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
+    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
+    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
+    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
+    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
+    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
+    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
+    t1r = vec_sld(t1, t1, 8);			/* B F J N */
+    t2r = vec_sld(t2, t2, 8);			/* K O C G */
+    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
+    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
+    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
+    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
+    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
+    tmp.v = _vmathVfDot4(t0,cof0);
+    return tmp.s[0];
+}
+
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Neg( &result->col0, &mat->col0 );
+    vmathV4Neg( &result->col1, &mat->col1 );
+    vmathV4Neg( &result->col2, &mat->col2 );
+    vmathV4Neg( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4AbsPerElem( &result->col0, &mat->col0 );
+    vmathV4AbsPerElem( &result->col1, &mat->col1 );
+    vmathV4AbsPerElem( &result->col2, &mat->col2 );
+    vmathV4AbsPerElem( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
+    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
+}
+
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz, wwww;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( vec->vec128, 0 );
+    yyyy = vec_splat( vec->vec128, 1 );
+    zzzz = vec_splat( vec->vec128, 2 );
+    wwww = vec_splat( vec->vec128, 3 );
+    tmp0 = vec_madd( mat->col0.vec128, xxxx, zero );
+    tmp1 = vec_madd( mat->col1.vec128, yyyy, zero );
+    tmp0 = vec_madd( mat->col2.vec128, zzzz, tmp0 );
+    tmp1 = vec_madd( mat->col3.vec128, wwww, tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec->vec128, 0 );
+    yyyy = vec_splat( vec->vec128, 1 );
+    zzzz = vec_splat( vec->vec128, 2 );
+    res = vec_madd( mat->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( mat->col1.vec128, yyyy, res );
+    res = vec_madd( mat->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( pnt->vec128, 0 );
+    yyyy = vec_splat( pnt->vec128, 1 );
+    zzzz = vec_splat( pnt->vec128, 2 );
+    tmp0 = vec_madd( mat->col0.vec128, xxxx, zero );
+    tmp1 = vec_madd( mat->col1.vec128, yyyy, zero );
+    tmp0 = vec_madd( mat->col2.vec128, zzzz, tmp0 );
+    tmp1 = vec_add( mat->col3.vec128, tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    VmathMatrix4 tmpResult;
+    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
+{
+    VmathMatrix4 tmpResult;
+    VmathPoint3 tmpP3_0;
+    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
+    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
+    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
+{
+    vmathV4SetXYZ( &result->col0, &mat3->col0 );
+    vmathV4SetXYZ( &result->col1, &mat3->col1 );
+    vmathV4SetXYZ( &result->col2, &mat3->col2 );
+}
+
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( &result->col0, &mat->col0 );
+    vmathV4GetXYZ( &result->col1, &mat->col1 );
+    vmathV4GetXYZ( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4SetXYZ( &result->col3, translateVec );
+}
+
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( result, &mat->col3 );
+}
+
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    vmathV4MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV4MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
+{
+    VmathVector4 tmpV4_0;
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
+    angles = tmpV4_0.vec128;
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    result->col0.vec128 = vec_madd( Z0, Y0, zero );
+    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
+    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    axis = unitVec->vec128;
+    sincosf4( (vec_float4){radians,radians,radians,radians}, &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    axisS = vec_madd( axis, s, zero );
+    negAxisS = negatef4( axisS );
+    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
+    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
+    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
+    zeroW = (vec_float4)_VECTORMATH_MASK_0x000F;
+    axis = vec_andc( axis, zeroW );
+    tmp0 = vec_andc( tmp0, zeroW );
+    tmp1 = vec_andc( tmp1, zeroW );
+    tmp2 = vec_andc( tmp2, zeroW );
+    result->col0.vec128 = vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 );
+    result->col1.vec128 = vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 );
+    result->col2.vec128 = vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
+{
+    VmathTransform3 tmpT3_0;
+    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
+    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
+    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
+{
+    VmathVector4 scale4;
+    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
+    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
+    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
+    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
+    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
+}
+
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
+{
+    VmathMatrix4 m4EyeFrame;
+    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathV3Normalize( &v3Y, upVec );
+    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
+    vmathV3Normalize( &v3Z, &tmpV3_0 );
+    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
+    vmathV3Normalize( &v3X, &tmpV3_1 );
+    vmathV3Cross( &v3Y, &v3Z, &v3X );
+    vmathV4MakeFromV3( &tmpV4_0, &v3X );
+    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
+    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
+    vmathV4MakeFromP3( &tmpV4_3, eyePos );
+    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
+    vmathM4OrthoInverse( result, &m4EyeFrame );
+}
+
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    vec_float4 zero, col0, col1, col2, col3;
+    union { vec_float4 v; float s[4]; } tmp;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp.v = zero;
+    tmp.s[0] = f / aspect;
+    col0 = tmp.v;
+    tmp.v = zero;
+    tmp.s[1] = f;
+    col1 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = ( zNear + zFar ) * rangeInv;
+    tmp.s[3] = -1.0f;
+    col2 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
+    col3 = tmp.v;
+    result->col0.vec128 = col0;
+    result->col1.vec128 = col1;
+    result->col2.vec128 = col2;
+    result->col3.vec128 = col3;
+}
+
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff;
+    vec_float4 diagonal, column, near2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = vec_splat( n.v, 0 );
+    near2 = vec_add( near2, near2 );
+    diagonal = vec_madd( near2, inv_diff, zero );
+    column = vec_madd( sum, inv_diff, zero );
+    result->col0.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 );
+    result->col1.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 );
+    result->col2.vec128 = vec_sel( column, ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}), _VECTORMATH_MASK_0x000F );
+    result->col3.vec128 = vec_sel( zero, vec_madd( diagonal, vec_splat( f.v, 0 ), zero ), _VECTORMATH_MASK_0x00F0 );
+}
+
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff, neg_inv_diff;
+    vec_float4 diagonal, column;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = vec_add( inv_diff, inv_diff );
+    column = vec_madd( sum, vec_sel( neg_inv_diff, inv_diff, _VECTORMATH_MASK_0x00F0 ), zero );
+    result->col0.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 );
+    result->col1.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 );
+    result->col2.vec128 = vec_sel( zero, diagonal, _VECTORMATH_MASK_0x00F0 );
+    result->col3.vec128 = vec_sel( column, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), _VECTORMATH_MASK_0x000F );
+}
+
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
+{
+    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print( const VmathMatrix4 *mat )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathM4GetRow( &tmpV4_0, mat, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathM4GetRow( &tmpV4_1, mat, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathM4GetRow( &tmpV4_2, mat, 2 );
+    vmathV4Print( &tmpV4_2 );
+    vmathM4GetRow( &tmpV4_3, mat, 3 );
+    vmathV4Print( &tmpV4_3 );
+}
+
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM4Print( mat );
+}
+
+#endif
+
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+    vmathV3MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
+{
+    vmathT3SetUpper3x3( result, tfrm );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 tmpM3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathT3SetUpper3x3( result, &tmpM3_0 );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathT3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, tfrm, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col0 );
+}
+
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col1 );
+}
+
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col2 );
+}
+
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
+{
+    vmathV3Copy( result, (&tfrm->col0 + col) );
+}
+
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
+{
+    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
+}
+
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp2 = _vmathVfCross( tfrm->col0.vec128, tfrm->col1.vec128 );
+    tmp0 = _vmathVfCross( tfrm->col1.vec128, tfrm->col2.vec128 );
+    tmp1 = _vmathVfCross( tfrm->col2.vec128, tfrm->col0.vec128 );
+    inv3 = negatef4( tfrm->col3.vec128 );
+    dot = _vmathVfDot3( tmp2, tfrm->col2.vec128 );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    xxxx = vec_splat( inv3, 0 );
+    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_madd( inv0, xxxx, zero );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    inv0 = vec_madd( inv0, invdet, zero );
+    inv1 = vec_madd( inv1, invdet, zero );
+    inv2 = vec_madd( inv2, invdet, zero );
+    inv3 = vec_madd( inv3, invdet, zero );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+    result->col3.vec128 = inv3;
+}
+
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1;
+    vec_float4 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( tfrm->col0.vec128, tfrm->col2.vec128 );
+    tmp1 = vec_mergel( tfrm->col0.vec128, tfrm->col2.vec128 );
+    inv3 = negatef4( tfrm->col3.vec128 );
+    inv0 = vec_mergeh( tmp0, tfrm->col1.vec128 );
+    xxxx = vec_splat( inv3, 0 );
+    inv1 = vec_perm( tmp0, tfrm->col1.vec128, _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp1, tfrm->col1.vec128, _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_madd( inv0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+    result->col3.vec128 = inv3;
+}
+
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
+    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
+    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
+    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec->vec128, 0 );
+    yyyy = vec_splat( vec->vec128, 1 );
+    zzzz = vec_splat( vec->vec128, 2 );
+    res = vec_madd( tfrm->col0.vec128, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( tfrm->col1.vec128, yyyy, res );
+    res = vec_madd( tfrm->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( pnt->vec128, 0 );
+    yyyy = vec_splat( pnt->vec128, 1 );
+    zzzz = vec_splat( pnt->vec128, 2 );
+    tmp0 = vec_madd( tfrm->col0.vec128, xxxx, zero );
+    tmp1 = vec_madd( tfrm->col1.vec128, yyyy, zero );
+    tmp0 = vec_madd( tfrm->col2.vec128, zzzz, tmp0 );
+    tmp1 = vec_add( tfrm->col3.vec128, tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    VmathTransform3 tmpResult;
+    VmathPoint3 tmpP3_0, tmpP3_1;
+    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
+    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
+    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
+    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
+    vmathT3Copy( result, &tmpResult );
+}
+
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
+    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
+    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
+    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
+}
+
+static inline void vmathT3MakeIdentity( VmathTransform3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+}
+
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
+{
+    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
+}
+
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    vmathV3MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV3MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( _vmathVfSplatScalar(radians), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
+{
+    VmathVector4 tmpV4_0;
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    vmathV4MakeFromV3Scalar( &tmpV4_0, radiansXYZ, 0.0f );
+    angles = tmpV4_0.vec128;
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    result->col0.vec128 = vec_madd( Z0, Y0, zero );
+    result->col1.vec128 = vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) );
+    result->col2.vec128 = vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    result->col0.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0xF000 );
+    result->col1.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x0F00 );
+    result->col2.vec128 = vec_sel( zero, scaleVec->vec128, _VECTORMATH_MASK_0x00F0 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
+    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
+}
+
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
+    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
+    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
+    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print( const VmathTransform3 *tfrm )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
+    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
+    vmathV4Print( &tmpV4_2 );
+}
+
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
+{
+    printf("%s:\n", name);
+    vmathT3Print( tfrm );
+}
+
+#endif
+
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
+{
+    vec_float4 res;
+    vec_float4 col0, col1, col2;
+    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
+    vec_float4 radicand, invSqrt, scale;
+    vec_float4 res0, res1, res2, res3;
+    vec_float4 xx, yy, zz;
+    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
+    vec_uint4 select_y = _VECTORMATH_MASK_0x0F00;
+    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
+    vec_uint4 select_w = _VECTORMATH_MASK_0x000F;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+
+    col0 = tfrm->col0.vec128;
+    col1 = tfrm->col1.vec128;
+    col2 = tfrm->col2.vec128;
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = vec_sel( col0, col1, select_y );
+    xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
+    yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
+    zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
+
+    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    invSqrt = rsqrtf4( radicand );
+
+    zy_xz_yx = vec_sel( col0, col1, select_z );
+    zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
+    yz_zx_xy = vec_sel( col0, col1, select_x );
+    yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
+
+    sum = vec_add( zy_xz_yx, yz_zx_xy );
+    diff = vec_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = vec_madd( invSqrt, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), zero );
+    res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
+    res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
+    res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
+    res3 = diff;
+    res0 = vec_sel( res0, radicand, select_x );
+    res1 = vec_sel( res1, radicand, select_y );
+    res2 = vec_sel( res2, radicand, select_z );
+    res3 = vec_sel( res3, radicand, select_w );
+    res0 = vec_madd( res0, vec_splat( scale, 0 ), zero );
+    res1 = vec_madd( res1, vec_splat( scale, 1 ), zero );
+    res2 = vec_madd( res2, vec_splat( scale, 2 ), zero );
+    res3 = vec_madd( res3, vec_splat( scale, 3 ), zero );
+
+    /* determine case and select answer */
+
+    xx = vec_splat( col0, 0 );
+    yy = vec_splat( col1, 1 );
+    zz = vec_splat( col2, 2 );
+    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
+    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
+    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), zero ) );
+    result->vec128 = res;
+}
+
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
+{
+    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
+    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
+    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
+}
+
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
+{
+    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
+    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
+    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
+    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
+}
+
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( mat->col0.vec128, mat->col2.vec128 );
+    tmp1 = vec_mergel( mat->col0.vec128, mat->col2.vec128 );
+    xxxx = vec_splat( vec->vec128, 0 );
+    mcol0 = vec_mergeh( tmp0, mat->col1.vec128 );
+    mcol1 = vec_perm( tmp0, mat->col1.vec128, _VECTORMATH_PERM_ZBWX );
+    mcol2 = vec_perm( tmp1, mat->col1.vec128, _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( vec->vec128, 1 );
+    res = vec_madd( mcol0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zzzz = vec_splat( vec->vec128, 2 );
+    res = vec_madd( mcol1, yyyy, res );
+    res = vec_madd( mcol2, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
+{
+    vec_float4 neg, res0, res1, res2;
+    neg = negatef4( vec->vec128 );
+    res0 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_XZBX );
+    res1 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_CXXX );
+    res2 = vec_perm( vec->vec128, neg, _VECTORMATH_PERM_YAXX );
+    res0 = vec_andc( res0, (vec_float4)_VECTORMATH_MASK_0xF000 );
+    res1 = vec_andc( res1, (vec_float4)_VECTORMATH_MASK_0x0F00 );
+    res2 = vec_andc( res2, (vec_float4)_VECTORMATH_MASK_0x00F0 );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
+    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
+    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
+    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos_v.h
index 47c683abe..f8f869bb8 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_aos_v.h
@@ -1,1026 +1,1026 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_V_C_H
-#define _VECTORMATH_MAT_AOS_V_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
-#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
-#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
-#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
-    return result;
-}
-
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
-{
-    vmathM3SetCol0(result, &_col0);
-}
-
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
-{
-    vmathM3SetCol1(result, &_col1);
-}
-
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
-{
-    vmathM3SetCol2(result, &_col2);
-}
-
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
-{
-    vmathM3SetCol(result, col, &vec);
-}
-
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
-{
-    vmathM3SetRow(result, row, &vec);
-}
-
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
-{
-    vmathM3SetElem(result, col, row, val);
-}
-
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
-{
-    return vmathM3GetElem(&mat, col, row);
-}
-
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
-{
-    VmathVector3 result;
-    vmathM3GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
-{
-    VmathVector3 result;
-    vmathM3GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Inverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM3Determinant_V( VmathMatrix3 mat )
-{
-    return vmathM3Determinant(&mat);
-}
-
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathM3MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeIdentity_V( )
-{
-    VmathMatrix3 result;
-    vmathM3MakeIdentity(&result);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
-{
-    VmathMatrix3 result;
-    vmathM3Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print_V( VmathMatrix3 mat )
-{
-    vmathM3Print(&mat);
-}
-
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
-{
-    vmathM3Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromT3(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
-{
-    vmathM4SetCol0(result, &_col0);
-}
-
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
-{
-    vmathM4SetCol1(result, &_col1);
-}
-
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
-{
-    vmathM4SetCol2(result, &_col2);
-}
-
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
-{
-    vmathM4SetCol3(result, &_col3);
-}
-
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
-{
-    vmathM4SetCol(result, col, &vec);
-}
-
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
-{
-    vmathM4SetRow(result, row, &vec);
-}
-
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
-{
-    vmathM4SetElem(result, col, row, val);
-}
-
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
-{
-    return vmathM4GetElem(&mat, col, row);
-}
-
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol3(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
-{
-    VmathVector4 result;
-    vmathM4GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
-{
-    VmathVector4 result;
-    vmathM4GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Inverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AffineInverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4OrthoInverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM4Determinant_V( VmathMatrix4 mat )
-{
-    return vmathM4Determinant(&mat);
-}
-
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV4(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathM4MulP3(&result, &mat, &pnt);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulT3(&result, &mat, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeIdentity_V( )
-{
-    VmathMatrix4 result;
-    vmathM4MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
-{
-    vmathM4SetUpper3x3(result, &mat3);
-}
-
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
-{
-    VmathMatrix3 result;
-    vmathM4GetUpper3x3(&result, &mat);
-    return result;
-}
-
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
-{
-    vmathM4SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
-{
-    VmathVector3 result;
-    vmathM4GetTranslation(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
-{
-    VmathMatrix4 result;
-    vmathM4Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print_V( VmathMatrix4 mat )
-{
-    vmathM4Print(&mat);
-}
-
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
-{
-    vmathM4Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
-{
-    vmathT3SetCol0(result, &_col0);
-}
-
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
-{
-    vmathT3SetCol1(result, &_col1);
-}
-
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
-{
-    vmathT3SetCol2(result, &_col2);
-}
-
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
-{
-    vmathT3SetCol3(result, &_col3);
-}
-
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
-{
-    vmathT3SetCol(result, col, &vec);
-}
-
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
-{
-    vmathT3SetRow(result, row, &vec);
-}
-
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
-{
-    vmathT3SetElem(result, col, row, val);
-}
-
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
-{
-    return vmathT3GetElem(&tfrm, col, row);
-}
-
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol0(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol1(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol2(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
-{
-    VmathVector3 result;
-    vmathT3GetCol(&result, &tfrm, col);
-    return result;
-}
-
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
-{
-    VmathVector4 result;
-    vmathT3GetRow(&result, &tfrm, row);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3Inverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3OrthoInverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3AbsPerElem(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathT3MulV3(&result, &tfrm, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathT3MulP3(&result, &tfrm, &pnt);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3Mul(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeIdentity_V( )
-{
-    VmathTransform3 result;
-    vmathT3MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
-{
-    vmathT3SetUpper3x3(result, &tfrm);
-}
-
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
-{
-    VmathMatrix3 result;
-    vmathT3GetUpper3x3(&result, &tfrm);
-    return result;
-}
-
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
-{
-    vmathT3SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetTranslation(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3AppendScale(&result, &tfrm, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3PrependScale(&result, &scaleVec, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
-{
-    VmathTransform3 result;
-    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print_V( VmathTransform3 tfrm )
-{
-    vmathT3Print(&tfrm);
-}
-
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
-{
-    vmathT3Prints(&tfrm, name);
-}
-
-#endif
-
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
-{
-    VmathQuat result;
-    vmathQMakeFromM3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
-{
-    VmathMatrix3 result;
-    vmathV3Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathV4Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathV3RowMul(&result, &vec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrix(&result, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrixMul(&result, &vec, &mat);
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_V_C_H
+#define _VECTORMATH_MAT_AOS_V_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
+#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
+#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
+#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
+    return result;
+}
+
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
+{
+    vmathM3SetCol0(result, &_col0);
+}
+
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
+{
+    vmathM3SetCol1(result, &_col1);
+}
+
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
+{
+    vmathM3SetCol2(result, &_col2);
+}
+
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
+{
+    vmathM3SetCol(result, col, &vec);
+}
+
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
+{
+    vmathM3SetRow(result, row, &vec);
+}
+
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
+{
+    vmathM3SetElem(result, col, row, val);
+}
+
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
+{
+    return vmathM3GetElem(&mat, col, row);
+}
+
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
+{
+    VmathVector3 result;
+    vmathM3GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
+{
+    VmathVector3 result;
+    vmathM3GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Inverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM3Determinant_V( VmathMatrix3 mat )
+{
+    return vmathM3Determinant(&mat);
+}
+
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathM3MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeIdentity_V( )
+{
+    VmathMatrix3 result;
+    vmathM3MakeIdentity(&result);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
+{
+    VmathMatrix3 result;
+    vmathM3Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print_V( VmathMatrix3 mat )
+{
+    vmathM3Print(&mat);
+}
+
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
+{
+    vmathM3Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromT3(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
+{
+    vmathM4SetCol0(result, &_col0);
+}
+
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
+{
+    vmathM4SetCol1(result, &_col1);
+}
+
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
+{
+    vmathM4SetCol2(result, &_col2);
+}
+
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
+{
+    vmathM4SetCol3(result, &_col3);
+}
+
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
+{
+    vmathM4SetCol(result, col, &vec);
+}
+
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
+{
+    vmathM4SetRow(result, row, &vec);
+}
+
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
+{
+    vmathM4SetElem(result, col, row, val);
+}
+
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
+{
+    return vmathM4GetElem(&mat, col, row);
+}
+
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol3(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
+{
+    VmathVector4 result;
+    vmathM4GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
+{
+    VmathVector4 result;
+    vmathM4GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Inverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AffineInverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4OrthoInverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM4Determinant_V( VmathMatrix4 mat )
+{
+    return vmathM4Determinant(&mat);
+}
+
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV4(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathM4MulP3(&result, &mat, &pnt);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulT3(&result, &mat, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeIdentity_V( )
+{
+    VmathMatrix4 result;
+    vmathM4MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
+{
+    vmathM4SetUpper3x3(result, &mat3);
+}
+
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
+{
+    VmathMatrix3 result;
+    vmathM4GetUpper3x3(&result, &mat);
+    return result;
+}
+
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
+{
+    vmathM4SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
+{
+    VmathVector3 result;
+    vmathM4GetTranslation(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
+{
+    VmathMatrix4 result;
+    vmathM4Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print_V( VmathMatrix4 mat )
+{
+    vmathM4Print(&mat);
+}
+
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
+{
+    vmathM4Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
+{
+    vmathT3SetCol0(result, &_col0);
+}
+
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
+{
+    vmathT3SetCol1(result, &_col1);
+}
+
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
+{
+    vmathT3SetCol2(result, &_col2);
+}
+
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
+{
+    vmathT3SetCol3(result, &_col3);
+}
+
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
+{
+    vmathT3SetCol(result, col, &vec);
+}
+
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
+{
+    vmathT3SetRow(result, row, &vec);
+}
+
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
+{
+    vmathT3SetElem(result, col, row, val);
+}
+
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
+{
+    return vmathT3GetElem(&tfrm, col, row);
+}
+
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol0(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol1(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol2(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
+{
+    VmathVector3 result;
+    vmathT3GetCol(&result, &tfrm, col);
+    return result;
+}
+
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
+{
+    VmathVector4 result;
+    vmathT3GetRow(&result, &tfrm, row);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3Inverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3OrthoInverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3AbsPerElem(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathT3MulV3(&result, &tfrm, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathT3MulP3(&result, &tfrm, &pnt);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3Mul(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeIdentity_V( )
+{
+    VmathTransform3 result;
+    vmathT3MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
+{
+    vmathT3SetUpper3x3(result, &tfrm);
+}
+
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
+{
+    VmathMatrix3 result;
+    vmathT3GetUpper3x3(&result, &tfrm);
+    return result;
+}
+
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
+{
+    vmathT3SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetTranslation(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3AppendScale(&result, &tfrm, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3PrependScale(&result, &scaleVec, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
+{
+    VmathTransform3 result;
+    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print_V( VmathTransform3 tfrm )
+{
+    vmathT3Print(&tfrm);
+}
+
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
+{
+    vmathT3Prints(&tfrm, name);
+}
+
+#endif
+
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
+{
+    VmathQuat result;
+    vmathQMakeFromM3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
+{
+    VmathMatrix3 result;
+    vmathV3Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathV4Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathV3RowMul(&result, &vec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrix(&result, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrixMul(&result, &vec, &mat);
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa.h
index 3b673c4eb..25b7af0aa 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa.h
@@ -1,1493 +1,1493 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_C_H
-#define _VECTORMATH_MAT_SOA_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( &result->col0, &mat->col0 );
-    vmathSoaV3Copy( &result->col1, &mat->col1 );
-    vmathSoaV3Copy( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar )
-{
-    vmathSoaV3MakeFromScalar( &result->col0, scalar );
-    vmathSoaV3MakeFromScalar( &result->col1, scalar );
-    vmathSoaV3MakeFromScalar( &result->col2, scalar );
-}
-
-static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
-{
-    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat->x;
-    qy = unitQuat->y;
-    qz = unitQuat->z;
-    qw = unitQuat->w;
-    qx2 = vec_add( qx, qx );
-    qy2 = vec_add( qy, qy );
-    qz2 = vec_add( qz, qz );
-    qxqx2 = vec_madd( qx, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqy2 = vec_madd( qx, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqz2 = vec_madd( qx, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqw2 = vec_madd( qw, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqy2 = vec_madd( qy, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqz2 = vec_madd( qy, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqw2 = vec_madd( qw, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qzqz2 = vec_madd( qz, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qzqw2 = vec_madd( qw, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col0, vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qyqy2 ), qzqz2 ), vec_add( qxqy2, qzqw2 ), vec_sub( qxqz2, qyqw2 ) );
-    vmathSoaV3MakeFromElems( &result->col1, vec_sub( qxqy2, qzqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qzqz2 ), vec_add( qyqz2, qxqw2 ) );
-    vmathSoaV3MakeFromElems( &result->col2, vec_add( qxqz2, qyqw2 ), vec_sub( qyqz2, qxqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qyqy2 ) );
-}
-
-static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-    vmathSoaV3Copy( &result->col1, _col1 );
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathSoaV3MakeFromAos( &result->col0, &mat->col0 );
-    vmathSoaV3MakeFromAos( &result->col1, &mat->col1 );
-    vmathSoaV3MakeFromAos( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 )
-{
-    vmathSoaV3MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
-    vmathSoaV3MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
-    vmathSoaV3MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
-}
-
-static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
-{
-    vmathSoaV3Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV3Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV3Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-}
-
-static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col1 )
-{
-    vmathSoaV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3SetElem( &result->col0, row, vmathSoaV3GetElem( vec, 0 ) );
-    vmathSoaV3SetElem( &result->col1, row, vmathSoaV3GetElem( vec, 1 ) );
-    vmathSoaV3SetElem( &result->col2, row, vmathSoaV3GetElem( vec, 2 ) );
-}
-
-static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3GetCol( &tmpV3_0, result, col );
-    vmathSoaV3SetElem( &tmpV3_0, row, val );
-    vmathSoaM3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3GetCol( &tmpV3_0, mat, col );
-    return vmathSoaV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col0 );
-}
-
-static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col1 );
-}
-
-static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col2 );
-}
-
-static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col )
-{
-    vmathSoaV3Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row )
-{
-    vmathSoaV3MakeFromElems( result, vmathSoaV3GetElem( &mat->col0, row ), vmathSoaV3GetElem( &mat->col1, row ), vmathSoaV3GetElem( &mat->col2, row ) );
-}
-
-static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaMatrix3 tmpResult;
-    vmathSoaV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
-    vmathSoaV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
-    vmathSoaV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
-    vmathSoaM3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmp0, tmp1, tmp2;
-    vec_float4 detinv;
-    vmathSoaV3Cross( &tmp0, &mat->col1, &mat->col2 );
-    vmathSoaV3Cross( &tmp1, &mat->col2, &mat->col0 );
-    vmathSoaV3Cross( &tmp2, &mat->col0, &mat->col1 );
-    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( &mat->col2, &tmp2 ) );
-    vmathSoaV3MakeFromElems( &result->col0, vec_madd( tmp0.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &result->col1, vec_madd( tmp0.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &result->col2, vec_madd( tmp0.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
-    return vmathSoaV3Dot( &mat->col2, &tmpV3_0 );
-}
-
-static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3Add( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Neg( &result->col0, &mat->col0 );
-    vmathSoaV3Neg( &result->col1, &mat->col1 );
-    vmathSoaV3Neg( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3AbsPerElem( &result->col0, &mat->col0 );
-    vmathSoaV3AbsPerElem( &result->col1, &mat->col1 );
-    vmathSoaV3AbsPerElem( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar )
-{
-    vmathSoaV3ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathSoaV3ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathSoaV3ScalarMul( &result->col2, &mat->col2, scalar );
-}
-
-static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    VmathSoaMatrix3 tmpResult;
-    vmathSoaM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathSoaM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathSoaM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathSoaM3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s );
-    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c );
-}
-
-static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-}
-
-static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) );
-    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    vmathSoaV3MakeFromElems( &result->col0, vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ) );
-}
-
-static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
-{
-    vmathSoaM3MakeFromQ( result, unitQuat );
-}
-
-static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z );
-}
-
-static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
-}
-
-static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3MulPerElem( &result->col0, &mat->col0, scaleVec );
-    vmathSoaV3MulPerElem( &result->col1, &mat->col1, scaleVec );
-    vmathSoaV3MulPerElem( &result->col2, &mat->col2, scaleVec );
-}
-
-static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 )
-{
-    vmathSoaV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathSoaV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathSoaV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat )
-{
-    VmathMatrix3 mat0, mat1, mat2, mat3;
-    vmathSoaM3Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathM3Print( &mat0 );
-    printf("slot 1:\n");
-    vmathM3Print( &mat1 );
-    printf("slot 2:\n");
-    vmathM3Print( &mat2 );
-    printf("slot 3:\n");
-    vmathM3Print( &mat3 );
-}
-
-static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaM3Print( mat );
-}
-
-#endif
-
-static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( &result->col0, &mat->col0 );
-    vmathSoaV4Copy( &result->col1, &mat->col1 );
-    vmathSoaV4Copy( &result->col2, &mat->col2 );
-    vmathSoaV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar )
-{
-    vmathSoaV4MakeFromScalar( &result->col0, scalar );
-    vmathSoaV4MakeFromScalar( &result->col1, scalar );
-    vmathSoaV4MakeFromScalar( &result->col2, scalar );
-    vmathSoaV4MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat )
-{
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, &mat->col3, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0, const VmathSoaVector4 *_col1, const VmathSoaVector4 *_col2, const VmathSoaVector4 *_col3 )
-{
-    vmathSoaV4Copy( &result->col0, _col0 );
-    vmathSoaV4Copy( &result->col1, _col1 );
-    vmathSoaV4Copy( &result->col2, _col2 );
-    vmathSoaV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
-{
-    VmathSoaMatrix3 mat;
-    vmathSoaM3MakeFromQ( &mat, unitQuat );
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat.col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat.col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat.col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathSoaV4MakeFromAos( &result->col0, &mat->col0 );
-    vmathSoaV4MakeFromAos( &result->col1, &mat->col1 );
-    vmathSoaV4MakeFromAos( &result->col2, &mat->col2 );
-    vmathSoaV4MakeFromAos( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 )
-{
-    vmathSoaV4MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
-    vmathSoaV4MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
-    vmathSoaV4MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
-    vmathSoaV4MakeFrom4Aos( &result->col3, &mat0->col3, &mat1->col3, &mat2->col3, &mat3->col3 );
-}
-
-static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
-{
-    vmathSoaV4Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV4Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV4Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-    vmathSoaV4Get4Aos( &mat->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
-}
-
-static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0 )
-{
-    vmathSoaV4Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col1 )
-{
-    vmathSoaV4Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col2 )
-{
-    vmathSoaV4Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col3 )
-{
-    vmathSoaV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec )
-{
-    vmathSoaV4Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec )
-{
-    vmathSoaV4SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
-    vmathSoaV4SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
-    vmathSoaV4SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
-    vmathSoaV4SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector4 tmpV3_0;
-    vmathSoaM4GetCol( &tmpV3_0, result, col );
-    vmathSoaV4SetElem( &tmpV3_0, row, val );
-    vmathSoaM4SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row )
-{
-    VmathSoaVector4 tmpV4_0;
-    vmathSoaM4GetCol( &tmpV4_0, mat, col );
-    return vmathSoaV4GetElem( &tmpV4_0, row );
-}
-
-static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col0 );
-}
-
-static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col1 );
-}
-
-static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col2 );
-}
-
-static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col3 );
-}
-
-static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col )
-{
-    vmathSoaV4Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row )
-{
-    vmathSoaV4MakeFromElems( result, vmathSoaV4GetElem( &mat->col0, row ), vmathSoaV4GetElem( &mat->col1, row ), vmathSoaV4GetElem( &mat->col2, row ), vmathSoaV4GetElem( &mat->col3, row ) );
-}
-
-static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaMatrix4 tmpResult;
-    vmathSoaV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
-    vmathSoaV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
-    vmathSoaV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
-    vmathSoaV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaVector4 res0, res1, res2, res3;
-    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetX( &res0, vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV4SetY( &res0, vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV4SetZ( &res0, vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV4SetW( &res0, vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    detInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_add( vec_add( vec_add( vec_madd( mA, res0.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, res0.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, res0.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, res0.w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    vmathSoaV4SetX( &res1, vec_madd( mI, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetY( &res1, vec_madd( mM, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetZ( &res1, vec_madd( mA, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetW( &res1, vec_madd( mE, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetX( &res3, vec_madd( mI, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetY( &res3, vec_madd( mM, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetZ( &res3, vec_madd( mA, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetW( &res3, vec_madd( mE, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetX( &res2, vec_madd( mI, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetY( &res2, vec_madd( mM, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetZ( &res2, vec_madd( mA, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetW( &res2, vec_madd( mE, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp0 = vec_sub( vec_madd( mI, mB, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mJ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mM, mF, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mN, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mI, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mM, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mI, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mM, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4SetX( &res2, vec_add( vec_sub( vec_madd( mL, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.x ) );
-    vmathSoaV4SetY( &res2, vec_add( vec_sub( vec_madd( mP, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.y ) );
-    vmathSoaV4SetZ( &res2, vec_sub( vec_sub( vec_madd( mB, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mD, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.z ) );
-    vmathSoaV4SetW( &res2, vec_sub( vec_sub( vec_madd( mF, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mH, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.w ) );
-    vmathSoaV4SetX( &res3, vec_add( vec_sub( vec_madd( mJ, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mK, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.x ) );
-    vmathSoaV4SetY( &res3, vec_add( vec_sub( vec_madd( mN, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mO, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.y ) );
-    vmathSoaV4SetZ( &res3, vec_sub( vec_sub( vec_madd( mC, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.z ) );
-    vmathSoaV4SetW( &res3, vec_sub( vec_sub( vec_madd( mG, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.w ) );
-    vmathSoaV4SetX( &res1, vec_sub( vec_sub( vec_madd( mK, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.x ) );
-    vmathSoaV4SetY( &res1, vec_sub( vec_sub( vec_madd( mO, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.y ) );
-    vmathSoaV4SetZ( &res1, vec_add( vec_sub( vec_madd( mD, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.z ) );
-    vmathSoaV4SetW( &res1, vec_add( vec_sub( vec_madd( mH, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.w ) );
-    vmathSoaV4ScalarMul( &result->col0, &res0, detInv );
-    vmathSoaV4ScalarMul( &result->col1, &res1, detInv );
-    vmathSoaV4ScalarMul( &result->col2, &res2, detInv );
-    vmathSoaV4ScalarMul( &result->col3, &res3, detInv );
-}
-
-static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaTransform3 affineMat, tmpT3_0;
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathSoaT3Inverse( &tmpT3_0, &affineMat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaTransform3 affineMat, tmpT3_0;
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathSoaT3OrthoInverse( &tmpT3_0, &affineMat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat )
-{
-    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dx = vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dy = vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dz = vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dw = vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return vec_add( vec_add( vec_add( vec_madd( mA, dx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, dy, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, dz, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, dw, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4Add( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4Add( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Neg( &result->col0, &mat->col0 );
-    vmathSoaV4Neg( &result->col1, &mat->col1 );
-    vmathSoaV4Neg( &result->col2, &mat->col2 );
-    vmathSoaV4Neg( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4AbsPerElem( &result->col0, &mat->col0 );
-    vmathSoaV4AbsPerElem( &result->col1, &mat->col1 );
-    vmathSoaV4AbsPerElem( &result->col2, &mat->col2 );
-    vmathSoaV4AbsPerElem( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar )
-{
-    vmathSoaV4ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathSoaV4ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathSoaV4ScalarMul( &result->col2, &mat->col2, scalar );
-    vmathSoaV4ScalarMul( &result->col3, &mat->col3, scalar );
-}
-
-static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = vec_add( vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.x, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_add( vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.y, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_add( vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.z, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpW = vec_add( vec_add( vec_add( vec_madd( mat->col0.w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.w, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec )
-{
-    result->x = vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->y = vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->z = vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->w = vec_add( vec_add( vec_madd( mat->col0.w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt )
-{
-    result->x = vec_add( vec_add( vec_add( vec_madd( mat->col0.x, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.x );
-    result->y = vec_add( vec_add( vec_add( vec_madd( mat->col0.y, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.y );
-    result->z = vec_add( vec_add( vec_add( vec_madd( mat->col0.z, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.z );
-    result->w = vec_add( vec_add( vec_add( vec_madd( mat->col0.w, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.w );
-}
-
-static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    VmathSoaMatrix4 tmpResult;
-    vmathSoaM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathSoaM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathSoaM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathSoaM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm1 )
-{
-    VmathSoaMatrix4 tmpResult;
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
-    vmathSoaM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
-    vmathSoaM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
-    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathSoaM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result )
-{
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 )
-{
-    vmathSoaV4SetXYZ( &result->col0, &mat3->col0 );
-    vmathSoaV4SetXYZ( &result->col1, &mat3->col1 );
-    vmathSoaV4SetXYZ( &result->col2, &mat3->col2 );
-}
-
-static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4GetXYZ( &result->col0, &mat->col0 );
-    vmathSoaV4GetXYZ( &result->col1, &mat->col1 );
-    vmathSoaV4GetXYZ( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4SetXYZ( &result->col3, translateVec );
-}
-
-static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4GetXYZ( result, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    vmathSoaV4MakeFromElems( &result->col0, vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat )
-{
-    VmathSoaTransform3 tmpT3_0;
-    vmathSoaT3MakeRotationQ( &tmpT3_0, unitQuat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV4MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV4ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV4ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV4ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
-    vmathSoaV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaVector4 scale4;
-    vmathSoaV4MakeFromV3Scalar( &scale4, scaleVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    vmathSoaV4MulPerElem( &result->col0, &mat->col0, &scale4 );
-    vmathSoaV4MulPerElem( &result->col1, &mat->col1, &scale4 );
-    vmathSoaV4MulPerElem( &result->col2, &mat->col2, &scale4 );
-    vmathSoaV4MulPerElem( &result->col3, &mat->col3, &scale4 );
-}
-
-static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec )
-{
-    VmathSoaMatrix4 m4EyeFrame;
-    VmathSoaVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
-    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathSoaV3Normalize( &v3Y, upVec );
-    vmathSoaP3Sub( &tmpV3_0, eyePos, lookAtPos );
-    vmathSoaV3Normalize( &v3Z, &tmpV3_0 );
-    vmathSoaV3Cross( &tmpV3_1, &v3Y, &v3Z );
-    vmathSoaV3Normalize( &v3X, &tmpV3_1 );
-    vmathSoaV3Cross( &v3Y, &v3Z, &v3X );
-    vmathSoaV4MakeFromV3( &tmpV4_0, &v3X );
-    vmathSoaV4MakeFromV3( &tmpV4_1, &v3Y );
-    vmathSoaV4MakeFromV3( &tmpV4_2, &v3Z );
-    vmathSoaV4MakeFromP3( &tmpV4_3, eyePos );
-    vmathSoaM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
-    vmathSoaM4OrthoInverse( result, &m4EyeFrame );
-}
-
-static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 f, rangeInv;
-    f = tanf4( vec_sub( ((vec_float4){_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2}), vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), fovyRadians, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    rangeInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    vmathSoaV4MakeFromElems( &result->col0, divf4( f, aspect ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), f, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_add( zNear, zFar ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) );
-    vmathSoaV4MakeFromElems( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( vec_madd( zNear, zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = vec_add( right, left );
-    sum_tb = vec_add( top, bottom );
-    sum_nf = vec_add( zNear, zFar );
-    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
-    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
-    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    n2 = vec_add( zNear, zNear );
-    vmathSoaV4MakeFromElems( &result->col0, vec_madd( n2, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( n2, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, vec_madd( sum_rl, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_tb, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) );
-    vmathSoaV4MakeFromElems( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( n2, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = vec_add( right, left );
-    sum_tb = vec_add( top, bottom );
-    sum_nf = vec_add( zNear, zFar );
-    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
-    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
-    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    vmathSoaV4MakeFromElems( &result->col0, vec_add( inv_rl, inv_rl ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_tb, inv_tb ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_nf, inv_nf ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV4MakeFromElems( &result->col3, vec_madd( negatef4( sum_rl ), inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( negatef4( sum_tb ), inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 )
-{
-    vmathSoaV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathSoaV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathSoaV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-    vmathSoaV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat )
-{
-    VmathMatrix4 mat0, mat1, mat2, mat3;
-    vmathSoaM4Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathM4Print( &mat0 );
-    printf("slot 1:\n");
-    vmathM4Print( &mat1 );
-    printf("slot 2:\n");
-    vmathM4Print( &mat2 );
-    printf("slot 3:\n");
-    vmathM4Print( &mat3 );
-}
-
-static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaM4Print( mat );
-}
-
-#endif
-
-static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
-    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
-    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
-    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar )
-{
-    vmathSoaV3MakeFromScalar( &result->col0, scalar );
-    vmathSoaV3MakeFromScalar( &result->col1, scalar );
-    vmathSoaV3MakeFromScalar( &result->col2, scalar );
-    vmathSoaV3MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2, const VmathSoaVector3 *_col3 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-    vmathSoaV3Copy( &result->col1, _col1 );
-    vmathSoaV3Copy( &result->col2, _col2 );
-    vmathSoaV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaT3SetUpper3x3( result, tfrm );
-    vmathSoaT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathSoaT3SetUpper3x3( result, &tmpM3_0 );
-    vmathSoaT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathSoaV3MakeFromAos( &result->col0, &tfrm->col0 );
-    vmathSoaV3MakeFromAos( &result->col1, &tfrm->col1 );
-    vmathSoaV3MakeFromAos( &result->col2, &tfrm->col2 );
-    vmathSoaV3MakeFromAos( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 )
-{
-    vmathSoaV3MakeFrom4Aos( &result->col0, &tfrm0->col0, &tfrm1->col0, &tfrm2->col0, &tfrm3->col0 );
-    vmathSoaV3MakeFrom4Aos( &result->col1, &tfrm0->col1, &tfrm1->col1, &tfrm2->col1, &tfrm3->col1 );
-    vmathSoaV3MakeFrom4Aos( &result->col2, &tfrm0->col2, &tfrm1->col2, &tfrm2->col2, &tfrm3->col2 );
-    vmathSoaV3MakeFrom4Aos( &result->col3, &tfrm0->col3, &tfrm1->col3, &tfrm2->col3, &tfrm3->col3 );
-}
-
-static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
-{
-    vmathSoaV3Get4Aos( &tfrm->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV3Get4Aos( &tfrm->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV3Get4Aos( &tfrm->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-    vmathSoaV3Get4Aos( &tfrm->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
-}
-
-static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *_col1 )
-{
-    vmathSoaV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *_col3 )
-{
-    vmathSoaV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec )
-{
-    vmathSoaV3SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
-    vmathSoaV3SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
-    vmathSoaV3SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
-    vmathSoaV3SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaT3GetCol( &tmpV3_0, result, col );
-    vmathSoaV3SetElem( &tmpV3_0, row, val );
-    vmathSoaT3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaT3GetCol( &tmpV3_0, tfrm, col );
-    return vmathSoaV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col0 );
-}
-
-static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col1 );
-}
-
-static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col )
-{
-    vmathSoaV3Copy( result, (&tfrm->col0 + col) );
-}
-
-static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row )
-{
-    vmathSoaV4MakeFromElems( result, vmathSoaV3GetElem( &tfrm->col0, row ), vmathSoaV3GetElem( &tfrm->col1, row ), vmathSoaV3GetElem( &tfrm->col2, row ), vmathSoaV3GetElem( &tfrm->col3, row ) );
-}
-
-static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    VmathSoaVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    vec_float4 detinv;
-    vmathSoaV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
-    vmathSoaV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
-    vmathSoaV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
-    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( &tfrm->col2, &tmp2 ) );
-    vmathSoaV3MakeFromElems( &inv0, vec_madd( tmp0.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &inv1, vec_madd( tmp0.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &inv2, vec_madd( tmp0.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3Copy( &result->col0, &inv0 );
-    vmathSoaV3Copy( &result->col1, &inv1 );
-    vmathSoaV3Copy( &result->col2, &inv2 );
-    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    VmathSoaVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    vmathSoaV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
-    vmathSoaV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
-    vmathSoaV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
-    vmathSoaV3Copy( &result->col0, &inv0 );
-    vmathSoaV3Copy( &result->col1, &inv1 );
-    vmathSoaV3Copy( &result->col2, &inv2 );
-    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3AbsPerElem( &result->col0, &tfrm->col0 );
-    vmathSoaV3AbsPerElem( &result->col1, &tfrm->col1 );
-    vmathSoaV3AbsPerElem( &result->col2, &tfrm->col2 );
-    vmathSoaV3AbsPerElem( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = vec_add( vec_add( vec_madd( tfrm->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_add( vec_add( vec_madd( tfrm->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_add( vec_add( vec_madd( tfrm->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.x, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.x, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.x, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.x );
-    tmpY = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.y, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.y, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.y, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.y );
-    tmpZ = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.z, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.z, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.z, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.z );
-    vmathSoaP3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
-{
-    VmathSoaTransform3 tmpResult;
-    VmathSoaPoint3 tmpP3_0, tmpP3_1;
-    vmathSoaT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
-    vmathSoaT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
-    vmathSoaT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
-    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathSoaT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
-    vmathSoaV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
-    vmathSoaT3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
-{
-    vmathSoaV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
-    vmathSoaV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
-    vmathSoaV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
-    vmathSoaV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
-}
-
-static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm )
-{
-    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
-    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
-    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s );
-    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) );
-    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
-    vmathSoaV3MakeFromScalar( &tmpV3_0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathSoaV3MakeFromScalar( &tmpV3_0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z );
-    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3ScalarMul( &result->col0, &tfrm->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col1, &tfrm->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col2, &tfrm->col2, vmathSoaV3GetZ( scaleVec ) );
-    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
-    vmathSoaV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
-    vmathSoaV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
-    vmathSoaV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
-}
-
-static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 )
-{
-    vmathSoaV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
-    vmathSoaV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
-    vmathSoaV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
-    vmathSoaV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm )
-{
-    VmathTransform3 mat0, mat1, mat2, mat3;
-    vmathSoaT3Get4Aos( tfrm, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathT3Print( &mat0 );
-    printf("slot 1:\n");
-    vmathT3Print( &mat1 );
-    printf("slot 2:\n");
-    vmathT3Print( &mat2 );
-    printf("slot 3:\n");
-    vmathT3Print( &mat3 );
-}
-
-static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaT3Print( tfrm );
-}
-
-#endif
-
-static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *tfrm )
-{
-    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
-    vec_uint4 largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm->col0.x;
-    yx = tfrm->col0.y;
-    zx = tfrm->col0.z;
-    xy = tfrm->col1.x;
-    yy = tfrm->col1.y;
-    zy = tfrm->col1.z;
-    xz = tfrm->col2.x;
-    yz = tfrm->col2.y;
-    zz = tfrm->col2.z;
-
-    trace = vec_add( vec_add( xx, yy ), zz );
-
-    negTrace = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), trace );
-    ZgtX = (vec_uint4)vec_cmpgt( zz, xx );
-    ZgtY = (vec_uint4)vec_cmpgt( zz, yy );
-    YgtX = (vec_uint4)vec_cmpgt( yy, xx );
-    largestXorY = vec_andc( negTrace, vec_and( ZgtX, ZgtY ) );
-    largestYorZ = vec_and( negTrace, vec_or( YgtX, ZgtX ) );
-    largestZorX = vec_andc( negTrace, vec_andc( YgtX, ZgtY ) );
-    
-    zz = vec_sel( zz, negatef4(zz), largestXorY );
-    xy = vec_sel( xy, negatef4(xy), largestXorY );
-    xx = vec_sel( xx, negatef4(xx), largestYorZ );
-    yz = vec_sel( yz, negatef4(yz), largestYorZ );
-    yy = vec_sel( yy, negatef4(yy), largestZorX );
-    zx = vec_sel( zx, negatef4(zx), largestZorX );
-
-    radicand = vec_add( vec_add( vec_add( xx, yy ), zz ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    scale = vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( radicand ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-
-    tmpx = vec_madd( vec_sub( zy, yz ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpy = vec_madd( vec_sub( xz, zx ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpz = vec_madd( vec_sub( yx, xy ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpw = vec_madd( radicand, scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    qx = vec_sel( qx, tmpw, largestXorY );
-    qy = vec_sel( qy, tmpz, largestXorY );
-    qz = vec_sel( qz, tmpy, largestXorY );
-    qw = vec_sel( qw, tmpx, largestXorY );
-    tmpx = qx;
-    tmpz = qz;
-    qx = vec_sel( qx, qy, largestYorZ );
-    qy = vec_sel( qy, tmpx, largestYorZ );
-    qz = vec_sel( qz, qw, largestYorZ );
-    qw = vec_sel( qw, tmpz, largestYorZ );
-
-    result->x = qx;
-    result->y = qy;
-    result->z = qz;
-    result->w = qw;
-}
-
-static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *tfrm0, const VmathSoaVector3 *tfrm1 )
-{
-    vmathSoaV3ScalarMul( &result->col0, tfrm0, vmathSoaV3GetX( tfrm1 ) );
-    vmathSoaV3ScalarMul( &result->col1, tfrm0, vmathSoaV3GetY( tfrm1 ) );
-    vmathSoaV3ScalarMul( &result->col2, tfrm0, vmathSoaV3GetZ( tfrm1 ) );
-}
-
-static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *tfrm0, const VmathSoaVector4 *tfrm1 )
-{
-    vmathSoaV4ScalarMul( &result->col0, tfrm0, vmathSoaV4GetX( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col1, tfrm0, vmathSoaV4GetY( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col2, tfrm0, vmathSoaV4GetZ( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col3, tfrm0, vmathSoaV4GetW( tfrm1 ) );
-}
-
-static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = vec_add( vec_add( vec_madd( vec->x, mat->col0.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col0.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col0.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_add( vec_add( vec_madd( vec->x, mat->col1.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col1.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col1.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_add( vec_add( vec_madd( vec->x, mat->col2.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col2.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col2.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec->z, negatef4( vec->y ) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( vec->z ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec->x );
-    vmathSoaV3MakeFromElems( &result->col2, vec->y, negatef4( vec->x ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathSoaV3Cross( &tmpV3_0, vec, &mat->col0 );
-    vmathSoaV3Cross( &tmpV3_1, vec, &mat->col1 );
-    vmathSoaV3Cross( &tmpV3_2, vec, &mat->col2 );
-    vmathSoaM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_C_H
+#define _VECTORMATH_MAT_SOA_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( &result->col0, &mat->col0 );
+    vmathSoaV3Copy( &result->col1, &mat->col1 );
+    vmathSoaV3Copy( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar )
+{
+    vmathSoaV3MakeFromScalar( &result->col0, scalar );
+    vmathSoaV3MakeFromScalar( &result->col1, scalar );
+    vmathSoaV3MakeFromScalar( &result->col2, scalar );
+}
+
+static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
+{
+    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat->x;
+    qy = unitQuat->y;
+    qz = unitQuat->z;
+    qw = unitQuat->w;
+    qx2 = vec_add( qx, qx );
+    qy2 = vec_add( qy, qy );
+    qz2 = vec_add( qz, qz );
+    qxqx2 = vec_madd( qx, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqy2 = vec_madd( qx, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqz2 = vec_madd( qx, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqw2 = vec_madd( qw, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqy2 = vec_madd( qy, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqz2 = vec_madd( qy, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqw2 = vec_madd( qw, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qzqz2 = vec_madd( qz, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qzqw2 = vec_madd( qw, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col0, vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qyqy2 ), qzqz2 ), vec_add( qxqy2, qzqw2 ), vec_sub( qxqz2, qyqw2 ) );
+    vmathSoaV3MakeFromElems( &result->col1, vec_sub( qxqy2, qzqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qzqz2 ), vec_add( qyqz2, qxqw2 ) );
+    vmathSoaV3MakeFromElems( &result->col2, vec_add( qxqz2, qyqw2 ), vec_sub( qyqz2, qxqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qyqy2 ) );
+}
+
+static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+    vmathSoaV3Copy( &result->col1, _col1 );
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathSoaV3MakeFromAos( &result->col0, &mat->col0 );
+    vmathSoaV3MakeFromAos( &result->col1, &mat->col1 );
+    vmathSoaV3MakeFromAos( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 )
+{
+    vmathSoaV3MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
+    vmathSoaV3MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
+    vmathSoaV3MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
+}
+
+static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
+{
+    vmathSoaV3Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV3Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV3Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+}
+
+static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col1 )
+{
+    vmathSoaV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3SetElem( &result->col0, row, vmathSoaV3GetElem( vec, 0 ) );
+    vmathSoaV3SetElem( &result->col1, row, vmathSoaV3GetElem( vec, 1 ) );
+    vmathSoaV3SetElem( &result->col2, row, vmathSoaV3GetElem( vec, 2 ) );
+}
+
+static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3GetCol( &tmpV3_0, result, col );
+    vmathSoaV3SetElem( &tmpV3_0, row, val );
+    vmathSoaM3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3GetCol( &tmpV3_0, mat, col );
+    return vmathSoaV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col0 );
+}
+
+static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col1 );
+}
+
+static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col2 );
+}
+
+static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col )
+{
+    vmathSoaV3Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row )
+{
+    vmathSoaV3MakeFromElems( result, vmathSoaV3GetElem( &mat->col0, row ), vmathSoaV3GetElem( &mat->col1, row ), vmathSoaV3GetElem( &mat->col2, row ) );
+}
+
+static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaMatrix3 tmpResult;
+    vmathSoaV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
+    vmathSoaV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
+    vmathSoaV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
+    vmathSoaM3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmp0, tmp1, tmp2;
+    vec_float4 detinv;
+    vmathSoaV3Cross( &tmp0, &mat->col1, &mat->col2 );
+    vmathSoaV3Cross( &tmp1, &mat->col2, &mat->col0 );
+    vmathSoaV3Cross( &tmp2, &mat->col0, &mat->col1 );
+    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( &mat->col2, &tmp2 ) );
+    vmathSoaV3MakeFromElems( &result->col0, vec_madd( tmp0.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &result->col1, vec_madd( tmp0.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &result->col2, vec_madd( tmp0.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
+    return vmathSoaV3Dot( &mat->col2, &tmpV3_0 );
+}
+
+static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3Add( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Neg( &result->col0, &mat->col0 );
+    vmathSoaV3Neg( &result->col1, &mat->col1 );
+    vmathSoaV3Neg( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3AbsPerElem( &result->col0, &mat->col0 );
+    vmathSoaV3AbsPerElem( &result->col1, &mat->col1 );
+    vmathSoaV3AbsPerElem( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar )
+{
+    vmathSoaV3ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathSoaV3ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathSoaV3ScalarMul( &result->col2, &mat->col2, scalar );
+}
+
+static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    VmathSoaMatrix3 tmpResult;
+    vmathSoaM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathSoaM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathSoaM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathSoaM3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s );
+    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c );
+}
+
+static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+}
+
+static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) );
+    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    vmathSoaV3MakeFromElems( &result->col0, vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ) );
+}
+
+static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
+{
+    vmathSoaM3MakeFromQ( result, unitQuat );
+}
+
+static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z );
+}
+
+static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
+}
+
+static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3MulPerElem( &result->col0, &mat->col0, scaleVec );
+    vmathSoaV3MulPerElem( &result->col1, &mat->col1, scaleVec );
+    vmathSoaV3MulPerElem( &result->col2, &mat->col2, scaleVec );
+}
+
+static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 )
+{
+    vmathSoaV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathSoaV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathSoaV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat )
+{
+    VmathMatrix3 mat0, mat1, mat2, mat3;
+    vmathSoaM3Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathM3Print( &mat0 );
+    printf("slot 1:\n");
+    vmathM3Print( &mat1 );
+    printf("slot 2:\n");
+    vmathM3Print( &mat2 );
+    printf("slot 3:\n");
+    vmathM3Print( &mat3 );
+}
+
+static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaM3Print( mat );
+}
+
+#endif
+
+static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( &result->col0, &mat->col0 );
+    vmathSoaV4Copy( &result->col1, &mat->col1 );
+    vmathSoaV4Copy( &result->col2, &mat->col2 );
+    vmathSoaV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar )
+{
+    vmathSoaV4MakeFromScalar( &result->col0, scalar );
+    vmathSoaV4MakeFromScalar( &result->col1, scalar );
+    vmathSoaV4MakeFromScalar( &result->col2, scalar );
+    vmathSoaV4MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat )
+{
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, &mat->col3, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0, const VmathSoaVector4 *_col1, const VmathSoaVector4 *_col2, const VmathSoaVector4 *_col3 )
+{
+    vmathSoaV4Copy( &result->col0, _col0 );
+    vmathSoaV4Copy( &result->col1, _col1 );
+    vmathSoaV4Copy( &result->col2, _col2 );
+    vmathSoaV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
+{
+    VmathSoaMatrix3 mat;
+    vmathSoaM3MakeFromQ( &mat, unitQuat );
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat.col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat.col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat.col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathSoaV4MakeFromAos( &result->col0, &mat->col0 );
+    vmathSoaV4MakeFromAos( &result->col1, &mat->col1 );
+    vmathSoaV4MakeFromAos( &result->col2, &mat->col2 );
+    vmathSoaV4MakeFromAos( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 )
+{
+    vmathSoaV4MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
+    vmathSoaV4MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
+    vmathSoaV4MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
+    vmathSoaV4MakeFrom4Aos( &result->col3, &mat0->col3, &mat1->col3, &mat2->col3, &mat3->col3 );
+}
+
+static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
+{
+    vmathSoaV4Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV4Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV4Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+    vmathSoaV4Get4Aos( &mat->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
+}
+
+static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0 )
+{
+    vmathSoaV4Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col1 )
+{
+    vmathSoaV4Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col2 )
+{
+    vmathSoaV4Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col3 )
+{
+    vmathSoaV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec )
+{
+    vmathSoaV4Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec )
+{
+    vmathSoaV4SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
+    vmathSoaV4SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
+    vmathSoaV4SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
+    vmathSoaV4SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector4 tmpV3_0;
+    vmathSoaM4GetCol( &tmpV3_0, result, col );
+    vmathSoaV4SetElem( &tmpV3_0, row, val );
+    vmathSoaM4SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row )
+{
+    VmathSoaVector4 tmpV4_0;
+    vmathSoaM4GetCol( &tmpV4_0, mat, col );
+    return vmathSoaV4GetElem( &tmpV4_0, row );
+}
+
+static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col0 );
+}
+
+static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col1 );
+}
+
+static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col2 );
+}
+
+static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col3 );
+}
+
+static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col )
+{
+    vmathSoaV4Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row )
+{
+    vmathSoaV4MakeFromElems( result, vmathSoaV4GetElem( &mat->col0, row ), vmathSoaV4GetElem( &mat->col1, row ), vmathSoaV4GetElem( &mat->col2, row ), vmathSoaV4GetElem( &mat->col3, row ) );
+}
+
+static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaMatrix4 tmpResult;
+    vmathSoaV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
+    vmathSoaV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
+    vmathSoaV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
+    vmathSoaV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaVector4 res0, res1, res2, res3;
+    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetX( &res0, vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV4SetY( &res0, vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV4SetZ( &res0, vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV4SetW( &res0, vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    detInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_add( vec_add( vec_add( vec_madd( mA, res0.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, res0.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, res0.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, res0.w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    vmathSoaV4SetX( &res1, vec_madd( mI, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetY( &res1, vec_madd( mM, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetZ( &res1, vec_madd( mA, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetW( &res1, vec_madd( mE, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetX( &res3, vec_madd( mI, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetY( &res3, vec_madd( mM, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetZ( &res3, vec_madd( mA, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetW( &res3, vec_madd( mE, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetX( &res2, vec_madd( mI, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetY( &res2, vec_madd( mM, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetZ( &res2, vec_madd( mA, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetW( &res2, vec_madd( mE, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp0 = vec_sub( vec_madd( mI, mB, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mJ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mM, mF, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mN, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mI, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mM, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mI, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mM, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4SetX( &res2, vec_add( vec_sub( vec_madd( mL, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.x ) );
+    vmathSoaV4SetY( &res2, vec_add( vec_sub( vec_madd( mP, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.y ) );
+    vmathSoaV4SetZ( &res2, vec_sub( vec_sub( vec_madd( mB, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mD, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.z ) );
+    vmathSoaV4SetW( &res2, vec_sub( vec_sub( vec_madd( mF, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mH, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.w ) );
+    vmathSoaV4SetX( &res3, vec_add( vec_sub( vec_madd( mJ, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mK, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.x ) );
+    vmathSoaV4SetY( &res3, vec_add( vec_sub( vec_madd( mN, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mO, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.y ) );
+    vmathSoaV4SetZ( &res3, vec_sub( vec_sub( vec_madd( mC, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.z ) );
+    vmathSoaV4SetW( &res3, vec_sub( vec_sub( vec_madd( mG, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.w ) );
+    vmathSoaV4SetX( &res1, vec_sub( vec_sub( vec_madd( mK, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.x ) );
+    vmathSoaV4SetY( &res1, vec_sub( vec_sub( vec_madd( mO, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.y ) );
+    vmathSoaV4SetZ( &res1, vec_add( vec_sub( vec_madd( mD, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.z ) );
+    vmathSoaV4SetW( &res1, vec_add( vec_sub( vec_madd( mH, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.w ) );
+    vmathSoaV4ScalarMul( &result->col0, &res0, detInv );
+    vmathSoaV4ScalarMul( &result->col1, &res1, detInv );
+    vmathSoaV4ScalarMul( &result->col2, &res2, detInv );
+    vmathSoaV4ScalarMul( &result->col3, &res3, detInv );
+}
+
+static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaTransform3 affineMat, tmpT3_0;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathSoaT3Inverse( &tmpT3_0, &affineMat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaTransform3 affineMat, tmpT3_0;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathSoaT3OrthoInverse( &tmpT3_0, &affineMat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat )
+{
+    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dx = vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dy = vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dz = vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dw = vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return vec_add( vec_add( vec_add( vec_madd( mA, dx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, dy, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, dz, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, dw, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4Add( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4Add( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Neg( &result->col0, &mat->col0 );
+    vmathSoaV4Neg( &result->col1, &mat->col1 );
+    vmathSoaV4Neg( &result->col2, &mat->col2 );
+    vmathSoaV4Neg( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4AbsPerElem( &result->col0, &mat->col0 );
+    vmathSoaV4AbsPerElem( &result->col1, &mat->col1 );
+    vmathSoaV4AbsPerElem( &result->col2, &mat->col2 );
+    vmathSoaV4AbsPerElem( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar )
+{
+    vmathSoaV4ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathSoaV4ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathSoaV4ScalarMul( &result->col2, &mat->col2, scalar );
+    vmathSoaV4ScalarMul( &result->col3, &mat->col3, scalar );
+}
+
+static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = vec_add( vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.x, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_add( vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.y, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_add( vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.z, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpW = vec_add( vec_add( vec_add( vec_madd( mat->col0.w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col3.w, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec )
+{
+    result->x = vec_add( vec_add( vec_madd( mat->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->y = vec_add( vec_add( vec_madd( mat->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->z = vec_add( vec_add( vec_madd( mat->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->w = vec_add( vec_add( vec_madd( mat->col0.w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt )
+{
+    result->x = vec_add( vec_add( vec_add( vec_madd( mat->col0.x, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.x, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.x, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.x );
+    result->y = vec_add( vec_add( vec_add( vec_madd( mat->col0.y, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.y, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.y, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.y );
+    result->z = vec_add( vec_add( vec_add( vec_madd( mat->col0.z, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.z, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.z, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.z );
+    result->w = vec_add( vec_add( vec_add( vec_madd( mat->col0.w, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mat->col1.w, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mat->col2.w, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mat->col3.w );
+}
+
+static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    VmathSoaMatrix4 tmpResult;
+    vmathSoaM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathSoaM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathSoaM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathSoaM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm1 )
+{
+    VmathSoaMatrix4 tmpResult;
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
+    vmathSoaM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
+    vmathSoaM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
+    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathSoaM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result )
+{
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 )
+{
+    vmathSoaV4SetXYZ( &result->col0, &mat3->col0 );
+    vmathSoaV4SetXYZ( &result->col1, &mat3->col1 );
+    vmathSoaV4SetXYZ( &result->col2, &mat3->col2 );
+}
+
+static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4GetXYZ( &result->col0, &mat->col0 );
+    vmathSoaV4GetXYZ( &result->col1, &mat->col1 );
+    vmathSoaV4GetXYZ( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4SetXYZ( &result->col3, translateVec );
+}
+
+static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4GetXYZ( result, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    vmathSoaV4MakeFromElems( &result->col0, vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat )
+{
+    VmathSoaTransform3 tmpT3_0;
+    vmathSoaT3MakeRotationQ( &tmpT3_0, unitQuat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV4MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV4ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV4ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV4ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
+    vmathSoaV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaVector4 scale4;
+    vmathSoaV4MakeFromV3Scalar( &scale4, scaleVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    vmathSoaV4MulPerElem( &result->col0, &mat->col0, &scale4 );
+    vmathSoaV4MulPerElem( &result->col1, &mat->col1, &scale4 );
+    vmathSoaV4MulPerElem( &result->col2, &mat->col2, &scale4 );
+    vmathSoaV4MulPerElem( &result->col3, &mat->col3, &scale4 );
+}
+
+static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec )
+{
+    VmathSoaMatrix4 m4EyeFrame;
+    VmathSoaVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
+    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathSoaV3Normalize( &v3Y, upVec );
+    vmathSoaP3Sub( &tmpV3_0, eyePos, lookAtPos );
+    vmathSoaV3Normalize( &v3Z, &tmpV3_0 );
+    vmathSoaV3Cross( &tmpV3_1, &v3Y, &v3Z );
+    vmathSoaV3Normalize( &v3X, &tmpV3_1 );
+    vmathSoaV3Cross( &v3Y, &v3Z, &v3X );
+    vmathSoaV4MakeFromV3( &tmpV4_0, &v3X );
+    vmathSoaV4MakeFromV3( &tmpV4_1, &v3Y );
+    vmathSoaV4MakeFromV3( &tmpV4_2, &v3Z );
+    vmathSoaV4MakeFromP3( &tmpV4_3, eyePos );
+    vmathSoaM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
+    vmathSoaM4OrthoInverse( result, &m4EyeFrame );
+}
+
+static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 f, rangeInv;
+    f = tanf4( vec_sub( ((vec_float4){_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2}), vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), fovyRadians, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    rangeInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    vmathSoaV4MakeFromElems( &result->col0, divf4( f, aspect ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), f, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_add( zNear, zFar ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) );
+    vmathSoaV4MakeFromElems( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( vec_madd( zNear, zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = vec_add( right, left );
+    sum_tb = vec_add( top, bottom );
+    sum_nf = vec_add( zNear, zFar );
+    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
+    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
+    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    n2 = vec_add( zNear, zNear );
+    vmathSoaV4MakeFromElems( &result->col0, vec_madd( n2, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( n2, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, vec_madd( sum_rl, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_tb, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) );
+    vmathSoaV4MakeFromElems( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( n2, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = vec_add( right, left );
+    sum_tb = vec_add( top, bottom );
+    sum_nf = vec_add( zNear, zFar );
+    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
+    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
+    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    vmathSoaV4MakeFromElems( &result->col0, vec_add( inv_rl, inv_rl ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_tb, inv_tb ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_nf, inv_nf ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV4MakeFromElems( &result->col3, vec_madd( negatef4( sum_rl ), inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( negatef4( sum_tb ), inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 )
+{
+    vmathSoaV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathSoaV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathSoaV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+    vmathSoaV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat )
+{
+    VmathMatrix4 mat0, mat1, mat2, mat3;
+    vmathSoaM4Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathM4Print( &mat0 );
+    printf("slot 1:\n");
+    vmathM4Print( &mat1 );
+    printf("slot 2:\n");
+    vmathM4Print( &mat2 );
+    printf("slot 3:\n");
+    vmathM4Print( &mat3 );
+}
+
+static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaM4Print( mat );
+}
+
+#endif
+
+static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
+    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
+    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
+    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar )
+{
+    vmathSoaV3MakeFromScalar( &result->col0, scalar );
+    vmathSoaV3MakeFromScalar( &result->col1, scalar );
+    vmathSoaV3MakeFromScalar( &result->col2, scalar );
+    vmathSoaV3MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2, const VmathSoaVector3 *_col3 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+    vmathSoaV3Copy( &result->col1, _col1 );
+    vmathSoaV3Copy( &result->col2, _col2 );
+    vmathSoaV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaT3SetUpper3x3( result, tfrm );
+    vmathSoaT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathSoaT3SetUpper3x3( result, &tmpM3_0 );
+    vmathSoaT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathSoaV3MakeFromAos( &result->col0, &tfrm->col0 );
+    vmathSoaV3MakeFromAos( &result->col1, &tfrm->col1 );
+    vmathSoaV3MakeFromAos( &result->col2, &tfrm->col2 );
+    vmathSoaV3MakeFromAos( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 )
+{
+    vmathSoaV3MakeFrom4Aos( &result->col0, &tfrm0->col0, &tfrm1->col0, &tfrm2->col0, &tfrm3->col0 );
+    vmathSoaV3MakeFrom4Aos( &result->col1, &tfrm0->col1, &tfrm1->col1, &tfrm2->col1, &tfrm3->col1 );
+    vmathSoaV3MakeFrom4Aos( &result->col2, &tfrm0->col2, &tfrm1->col2, &tfrm2->col2, &tfrm3->col2 );
+    vmathSoaV3MakeFrom4Aos( &result->col3, &tfrm0->col3, &tfrm1->col3, &tfrm2->col3, &tfrm3->col3 );
+}
+
+static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
+{
+    vmathSoaV3Get4Aos( &tfrm->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV3Get4Aos( &tfrm->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV3Get4Aos( &tfrm->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+    vmathSoaV3Get4Aos( &tfrm->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
+}
+
+static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *_col1 )
+{
+    vmathSoaV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *_col3 )
+{
+    vmathSoaV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec )
+{
+    vmathSoaV3SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
+    vmathSoaV3SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
+    vmathSoaV3SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
+    vmathSoaV3SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaT3GetCol( &tmpV3_0, result, col );
+    vmathSoaV3SetElem( &tmpV3_0, row, val );
+    vmathSoaT3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaT3GetCol( &tmpV3_0, tfrm, col );
+    return vmathSoaV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col0 );
+}
+
+static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col1 );
+}
+
+static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col )
+{
+    vmathSoaV3Copy( result, (&tfrm->col0 + col) );
+}
+
+static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row )
+{
+    vmathSoaV4MakeFromElems( result, vmathSoaV3GetElem( &tfrm->col0, row ), vmathSoaV3GetElem( &tfrm->col1, row ), vmathSoaV3GetElem( &tfrm->col2, row ), vmathSoaV3GetElem( &tfrm->col3, row ) );
+}
+
+static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    VmathSoaVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    vec_float4 detinv;
+    vmathSoaV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
+    vmathSoaV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
+    vmathSoaV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
+    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( &tfrm->col2, &tmp2 ) );
+    vmathSoaV3MakeFromElems( &inv0, vec_madd( tmp0.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.x, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &inv1, vec_madd( tmp0.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.y, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &inv2, vec_madd( tmp0.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.z, detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3Copy( &result->col0, &inv0 );
+    vmathSoaV3Copy( &result->col1, &inv1 );
+    vmathSoaV3Copy( &result->col2, &inv2 );
+    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    VmathSoaVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    vmathSoaV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
+    vmathSoaV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
+    vmathSoaV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
+    vmathSoaV3Copy( &result->col0, &inv0 );
+    vmathSoaV3Copy( &result->col1, &inv1 );
+    vmathSoaV3Copy( &result->col2, &inv2 );
+    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3AbsPerElem( &result->col0, &tfrm->col0 );
+    vmathSoaV3AbsPerElem( &result->col1, &tfrm->col1 );
+    vmathSoaV3AbsPerElem( &result->col2, &tfrm->col2 );
+    vmathSoaV3AbsPerElem( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = vec_add( vec_add( vec_madd( tfrm->col0.x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_add( vec_add( vec_madd( tfrm->col0.y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_add( vec_add( vec_madd( tfrm->col0.z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.x, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.x, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.x, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.x );
+    tmpY = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.y, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.y, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.y, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.y );
+    tmpZ = vec_add( vec_add( vec_add( vec_madd( tfrm->col0.z, pnt->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tfrm->col1.z, pnt->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tfrm->col2.z, pnt->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), tfrm->col3.z );
+    vmathSoaP3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
+{
+    VmathSoaTransform3 tmpResult;
+    VmathSoaPoint3 tmpP3_0, tmpP3_1;
+    vmathSoaT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
+    vmathSoaT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
+    vmathSoaT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
+    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathSoaT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
+    vmathSoaV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
+    vmathSoaT3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
+{
+    vmathSoaV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
+    vmathSoaV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
+    vmathSoaV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
+    vmathSoaV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
+}
+
+static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm )
+{
+    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
+    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
+    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s );
+    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeFromElems( &result->col2, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col0, vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) );
+    vmathSoaV3MakeFromElems( &result->col1, vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( &result->col2, vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
+    vmathSoaV3MakeFromScalar( &tmpV3_0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathSoaV3MakeFromScalar( &tmpV3_0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathSoaV3MakeFromElems( &result->col2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec->z );
+    vmathSoaV3MakeFromScalar( &result->col3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3ScalarMul( &result->col0, &tfrm->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col1, &tfrm->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col2, &tfrm->col2, vmathSoaV3GetZ( scaleVec ) );
+    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
+    vmathSoaV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
+    vmathSoaV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
+    vmathSoaV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
+}
+
+static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 )
+{
+    vmathSoaV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
+    vmathSoaV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
+    vmathSoaV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
+    vmathSoaV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm )
+{
+    VmathTransform3 mat0, mat1, mat2, mat3;
+    vmathSoaT3Get4Aos( tfrm, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathT3Print( &mat0 );
+    printf("slot 1:\n");
+    vmathT3Print( &mat1 );
+    printf("slot 2:\n");
+    vmathT3Print( &mat2 );
+    printf("slot 3:\n");
+    vmathT3Print( &mat3 );
+}
+
+static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaT3Print( tfrm );
+}
+
+#endif
+
+static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *tfrm )
+{
+    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
+    vec_uint4 largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm->col0.x;
+    yx = tfrm->col0.y;
+    zx = tfrm->col0.z;
+    xy = tfrm->col1.x;
+    yy = tfrm->col1.y;
+    zy = tfrm->col1.z;
+    xz = tfrm->col2.x;
+    yz = tfrm->col2.y;
+    zz = tfrm->col2.z;
+
+    trace = vec_add( vec_add( xx, yy ), zz );
+
+    negTrace = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), trace );
+    ZgtX = (vec_uint4)vec_cmpgt( zz, xx );
+    ZgtY = (vec_uint4)vec_cmpgt( zz, yy );
+    YgtX = (vec_uint4)vec_cmpgt( yy, xx );
+    largestXorY = vec_andc( negTrace, vec_and( ZgtX, ZgtY ) );
+    largestYorZ = vec_and( negTrace, vec_or( YgtX, ZgtX ) );
+    largestZorX = vec_andc( negTrace, vec_andc( YgtX, ZgtY ) );
+    
+    zz = vec_sel( zz, negatef4(zz), largestXorY );
+    xy = vec_sel( xy, negatef4(xy), largestXorY );
+    xx = vec_sel( xx, negatef4(xx), largestYorZ );
+    yz = vec_sel( yz, negatef4(yz), largestYorZ );
+    yy = vec_sel( yy, negatef4(yy), largestZorX );
+    zx = vec_sel( zx, negatef4(zx), largestZorX );
+
+    radicand = vec_add( vec_add( vec_add( xx, yy ), zz ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    scale = vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( radicand ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+
+    tmpx = vec_madd( vec_sub( zy, yz ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpy = vec_madd( vec_sub( xz, zx ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpz = vec_madd( vec_sub( yx, xy ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpw = vec_madd( radicand, scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    qx = vec_sel( qx, tmpw, largestXorY );
+    qy = vec_sel( qy, tmpz, largestXorY );
+    qz = vec_sel( qz, tmpy, largestXorY );
+    qw = vec_sel( qw, tmpx, largestXorY );
+    tmpx = qx;
+    tmpz = qz;
+    qx = vec_sel( qx, qy, largestYorZ );
+    qy = vec_sel( qy, tmpx, largestYorZ );
+    qz = vec_sel( qz, qw, largestYorZ );
+    qw = vec_sel( qw, tmpz, largestYorZ );
+
+    result->x = qx;
+    result->y = qy;
+    result->z = qz;
+    result->w = qw;
+}
+
+static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *tfrm0, const VmathSoaVector3 *tfrm1 )
+{
+    vmathSoaV3ScalarMul( &result->col0, tfrm0, vmathSoaV3GetX( tfrm1 ) );
+    vmathSoaV3ScalarMul( &result->col1, tfrm0, vmathSoaV3GetY( tfrm1 ) );
+    vmathSoaV3ScalarMul( &result->col2, tfrm0, vmathSoaV3GetZ( tfrm1 ) );
+}
+
+static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *tfrm0, const VmathSoaVector4 *tfrm1 )
+{
+    vmathSoaV4ScalarMul( &result->col0, tfrm0, vmathSoaV4GetX( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col1, tfrm0, vmathSoaV4GetY( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col2, tfrm0, vmathSoaV4GetZ( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col3, tfrm0, vmathSoaV4GetW( tfrm1 ) );
+}
+
+static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = vec_add( vec_add( vec_madd( vec->x, mat->col0.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col0.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col0.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_add( vec_add( vec_madd( vec->x, mat->col1.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col1.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col1.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_add( vec_add( vec_madd( vec->x, mat->col2.x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec->y, mat->col2.y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec->z, mat->col2.z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec->z, negatef4( vec->y ) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( vec->z ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec->x );
+    vmathSoaV3MakeFromElems( &result->col2, vec->y, negatef4( vec->x ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathSoaV3Cross( &tmpV3_0, vec, &mat->col0 );
+    vmathSoaV3Cross( &tmpV3_1, vec, &mat->col1 );
+    vmathSoaV3Cross( &tmpV3_2, vec, &mat->col2 );
+    vmathSoaM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa_v.h
index c8401e3d0..0b16a9553 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/mat_soa_v.h
@@ -1,1063 +1,1063 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_V_C_H
-#define _VECTORMATH_MAT_SOA_V_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromCols(&result, &_col0, &_col1, &_col2);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromAos(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
-    return result;
-}
-
-static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
-{
-    vmathSoaM3Get4Aos(&mat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col0 )
-{
-    vmathSoaM3SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col1 )
-{
-    vmathSoaM3SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col2 )
-{
-    vmathSoaM3SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec )
-{
-    vmathSoaM3SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec )
-{
-    vmathSoaM3SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaM3SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row )
-{
-    return vmathSoaM3GetElem(&mat, col, row);
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Inverse(&result, &mat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat )
-{
-    return vmathSoaM3Determinant(&mat);
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeIdentity(&result);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat )
-{
-    vmathSoaM3Print(&mat);
-}
-
-static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name )
-{
-    vmathSoaM3Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromT3(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 _col0, VmathSoaVector4 _col1, VmathSoaVector4 _col2, VmathSoaVector4 _col3 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromM3V3(&result, &mat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromAos(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
-    return result;
-}
-
-static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
-{
-    vmathSoaM4Get4Aos(&mat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col0 )
-{
-    vmathSoaM4SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col1 )
-{
-    vmathSoaM4SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col2 )
-{
-    vmathSoaM4SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col3 )
-{
-    vmathSoaM4SetCol3(result, &_col3);
-}
-
-static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec )
-{
-    vmathSoaM4SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec )
-{
-    vmathSoaM4SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaM4SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row )
-{
-    return vmathSoaM4GetElem(&mat, col, row);
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol3(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Inverse(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AffineInverse(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4OrthoInverse(&result, &mat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat )
-{
-    return vmathSoaM4Determinant(&mat);
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulV4(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulP3(&result, &mat, &pnt);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MulT3(&result, &mat, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 )
-{
-    vmathSoaM4SetUpper3x3(result, &mat3);
-}
-
-static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM4GetUpper3x3(&result, &mat);
-    return result;
-}
-
-static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec )
-{
-    vmathSoaM4SetTranslation(result, &translateVec);
-}
-
-static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM4GetTranslation(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat )
-{
-    vmathSoaM4Print(&mat);
-}
-
-static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name )
-{
-    vmathSoaM4Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2, VmathSoaVector3 _col3 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromM3V3(&result, &tfrm, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromAos(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFrom4Aos(&result, &tfrm0, &tfrm1, &tfrm2, &tfrm3);
-    return result;
-}
-
-static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
-{
-    vmathSoaT3Get4Aos(&tfrm, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 _col0 )
-{
-    vmathSoaT3SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 _col1 )
-{
-    vmathSoaT3SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 _col2 )
-{
-    vmathSoaT3SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 _col3 )
-{
-    vmathSoaT3SetCol3(result, &_col3);
-}
-
-static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec )
-{
-    vmathSoaT3SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec )
-{
-    vmathSoaT3SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaT3SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row )
-{
-    return vmathSoaT3GetElem(&tfrm, col, row);
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol0(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol1(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol2(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol(&result, &tfrm, col);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row )
-{
-    VmathSoaVector4 result;
-    vmathSoaT3GetRow(&result, &tfrm, row);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Inverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3OrthoInverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3AbsPerElem(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3MulV3(&result, &tfrm, &vec);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaT3MulP3(&result, &tfrm, &pnt);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Mul(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MulPerElem(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 tfrm )
-{
-    vmathSoaT3SetUpper3x3(result, &tfrm);
-}
-
-static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaT3GetUpper3x3(&result, &tfrm);
-    return result;
-}
-
-static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec )
-{
-    vmathSoaT3SetTranslation(result, &translateVec);
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetTranslation(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3AppendScale(&result, &tfrm, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3PrependScale(&result, &scaleVec, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Select(&result, &tfrm0, &tfrm1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm )
-{
-    vmathSoaT3Print(&tfrm);
-}
-
-static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name )
-{
-    vmathSoaT3Prints(&tfrm, name);
-}
-
-#endif
-
-static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 tfrm )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromM3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 tfrm0, VmathSoaVector3 tfrm1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 tfrm0, VmathSoaVector4 tfrm1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaV4Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RowMul(&result, &vec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3CrossMatrix(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3CrossMatrixMul(&result, &vec, &mat);
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_V_C_H
+#define _VECTORMATH_MAT_SOA_V_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromCols(&result, &_col0, &_col1, &_col2);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromAos(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
+    return result;
+}
+
+static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
+{
+    vmathSoaM3Get4Aos(&mat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col0 )
+{
+    vmathSoaM3SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col1 )
+{
+    vmathSoaM3SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col2 )
+{
+    vmathSoaM3SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec )
+{
+    vmathSoaM3SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec )
+{
+    vmathSoaM3SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaM3SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row )
+{
+    return vmathSoaM3GetElem(&mat, col, row);
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Inverse(&result, &mat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat )
+{
+    return vmathSoaM3Determinant(&mat);
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeIdentity(&result);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat )
+{
+    vmathSoaM3Print(&mat);
+}
+
+static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name )
+{
+    vmathSoaM3Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromT3(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 _col0, VmathSoaVector4 _col1, VmathSoaVector4 _col2, VmathSoaVector4 _col3 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromM3V3(&result, &mat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromAos(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
+    return result;
+}
+
+static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
+{
+    vmathSoaM4Get4Aos(&mat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col0 )
+{
+    vmathSoaM4SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col1 )
+{
+    vmathSoaM4SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col2 )
+{
+    vmathSoaM4SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col3 )
+{
+    vmathSoaM4SetCol3(result, &_col3);
+}
+
+static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec )
+{
+    vmathSoaM4SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec )
+{
+    vmathSoaM4SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaM4SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row )
+{
+    return vmathSoaM4GetElem(&mat, col, row);
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol3(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Inverse(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AffineInverse(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4OrthoInverse(&result, &mat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat )
+{
+    return vmathSoaM4Determinant(&mat);
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulV4(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulP3(&result, &mat, &pnt);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MulT3(&result, &mat, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 )
+{
+    vmathSoaM4SetUpper3x3(result, &mat3);
+}
+
+static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM4GetUpper3x3(&result, &mat);
+    return result;
+}
+
+static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec )
+{
+    vmathSoaM4SetTranslation(result, &translateVec);
+}
+
+static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM4GetTranslation(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat )
+{
+    vmathSoaM4Print(&mat);
+}
+
+static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name )
+{
+    vmathSoaM4Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2, VmathSoaVector3 _col3 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromM3V3(&result, &tfrm, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromAos(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFrom4Aos(&result, &tfrm0, &tfrm1, &tfrm2, &tfrm3);
+    return result;
+}
+
+static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
+{
+    vmathSoaT3Get4Aos(&tfrm, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 _col0 )
+{
+    vmathSoaT3SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 _col1 )
+{
+    vmathSoaT3SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 _col2 )
+{
+    vmathSoaT3SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 _col3 )
+{
+    vmathSoaT3SetCol3(result, &_col3);
+}
+
+static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec )
+{
+    vmathSoaT3SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec )
+{
+    vmathSoaT3SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaT3SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row )
+{
+    return vmathSoaT3GetElem(&tfrm, col, row);
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol0(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol1(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol2(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol(&result, &tfrm, col);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row )
+{
+    VmathSoaVector4 result;
+    vmathSoaT3GetRow(&result, &tfrm, row);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Inverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3OrthoInverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3AbsPerElem(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3MulV3(&result, &tfrm, &vec);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaT3MulP3(&result, &tfrm, &pnt);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Mul(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MulPerElem(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 tfrm )
+{
+    vmathSoaT3SetUpper3x3(result, &tfrm);
+}
+
+static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaT3GetUpper3x3(&result, &tfrm);
+    return result;
+}
+
+static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec )
+{
+    vmathSoaT3SetTranslation(result, &translateVec);
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetTranslation(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3AppendScale(&result, &tfrm, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3PrependScale(&result, &scaleVec, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Select(&result, &tfrm0, &tfrm1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm )
+{
+    vmathSoaT3Print(&tfrm);
+}
+
+static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name )
+{
+    vmathSoaT3Prints(&tfrm, name);
+}
+
+#endif
+
+static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 tfrm )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromM3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 tfrm0, VmathSoaVector3 tfrm1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 tfrm0, VmathSoaVector4 tfrm1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaV4Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RowMul(&result, &vec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3CrossMatrix(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3CrossMatrixMul(&result, &vec, &mat);
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos.h
index 2915f4f07..a9e2ab729 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos.h
@@ -1,379 +1,379 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_C_H
-#define _VECTORMATH_QUAT_AOS_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
-        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
-        result->vec128 = (vec_float4){_x, _y, _z, _w};
-    } else {
-        float *pf = (float *)&result->vec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-        pf[3] = _w;
-    }
-}
-
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
-{
-    result->vec128 = xyz->vec128;
-    _vmathVfSetElement(result->vec128, _w, 3);
-}
-
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
-{
-    result->vec128 = _vmathVfSplatScalar(scalar);
-}
-
-static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathQMakeIdentity( VmathQuat *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0001;
-}
-
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    VmathQuat tmpQ_0, tmpQ_1;
-    vmathQSub( &tmpQ_0, quat1, quat0 );
-    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
-    vmathQAdd( result, quat0, &tmpQ_1 );
-}
-
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
-{
-    VmathQuat start;
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot4( unitQuat0->vec128, unitQuat1->vec128 );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), cosAngle );
-    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start.vec128 = vec_sel( unitQuat0->vec128, negatef4( unitQuat0->vec128 ), selectMask );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = _vmathVfSplatScalar(t);
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    result->vec128 = vec_madd( start.vec128, scale0, vec_madd( unitQuat1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
-{
-    VmathQuat tmp0, tmp1;
-    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
-    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
-    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
-}
-
-static inline vec_float4 vmathQGet128( const VmathQuat *quat )
-{
-    return quat->vec128;
-}
-
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec_sel( vec->vec128, result->vec128, _VECTORMATH_MASK_0x000F );
-}
-
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathQSetX( VmathQuat *result, float _x )
-{
-    _vmathVfSetElement(result->vec128, _x, 0);
-}
-
-static inline float vmathQGetX( const VmathQuat *quat )
-{
-    return _vmathVfGetElement(quat->vec128, 0);
-}
-
-static inline void vmathQSetY( VmathQuat *result, float _y )
-{
-    _vmathVfSetElement(result->vec128, _y, 1);
-}
-
-static inline float vmathQGetY( const VmathQuat *quat )
-{
-    return _vmathVfGetElement(quat->vec128, 1);
-}
-
-static inline void vmathQSetZ( VmathQuat *result, float _z )
-{
-    _vmathVfSetElement(result->vec128, _z, 2);
-}
-
-static inline float vmathQGetZ( const VmathQuat *quat )
-{
-    return _vmathVfGetElement(quat->vec128, 2);
-}
-
-static inline void vmathQSetW( VmathQuat *result, float _w )
-{
-    _vmathVfSetElement(result->vec128, _w, 3);
-}
-
-static inline float vmathQGetW( const VmathQuat *quat )
-{
-    return _vmathVfGetElement(quat->vec128, 3);
-}
-
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
-{
-    _vmathVfSetElement(result->vec128, value, idx);
-}
-
-static inline float vmathQGetElem( const VmathQuat *quat, int idx )
-{
-    return _vmathVfGetElement(quat->vec128, idx);
-}
-
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->vec128 = vec_add( quat0->vec128, quat1->vec128 );
-}
-
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->vec128 = vec_sub( quat0->vec128, quat1->vec128 );
-}
-
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->vec128 = vec_madd( quat->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->vec128 = divf4( quat->vec128, _vmathVfSplatScalar(scalar) );
-}
-
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = negatef4( quat->vec128 );
-}
-
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    vec_float4 result = _vmathVfDot4( quat0->vec128, quat1->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathQNorm( const VmathQuat *quat )
-{
-    vec_float4 result = _vmathVfDot4( quat->vec128, quat->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathQLength( const VmathQuat *quat )
-{
-    return sqrtf( vmathQNorm( quat ) );
-}
-
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
-{
-    vec_float4 dot = _vmathVfDot4( quat->vec128, quat->vec128 );
-    result->vec128 = vec_madd( quat->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    VmathVector3 crossVec, tmpV3_0;
-    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
-    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = vec_splat( cosAngle, 0 );
-    cosAngleX2Plus2 = vec_madd( cosAngle, ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){2.0f,2.0f,2.0f,2.0f}) );
-    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
-    cosHalfAngleX2 = vec_madd( recipCosHalfAngleX2, cosAngleX2Plus2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
-    crossVec = tmpV3_0;
-    res = vec_madd( crossVec.vec128, recipCosHalfAngleX2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_sel( res, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), _VECTORMATH_MASK_0x000F );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( vec_madd( unitVec->vec128, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c, _VECTORMATH_MASK_0x000F );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0xF000 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x0F00 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x00F0 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    result->vec128 = res;
-}
-
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
-    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
-    ldata = quat0->vec128;
-    rdata = quat1->vec128;
-    tmp0 = vec_perm( ldata, ldata, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( rdata, rdata, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( ldata, ldata, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( rdata, rdata, _VECTORMATH_PERM_YZXW );
-    qv = vec_madd( vec_splat( ldata, 3 ), rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
-    qv = vec_madd( tmp0, tmp1, qv );
-    qv = vec_nmsub( tmp2, tmp3, qv );
-    product = vec_madd( ldata, rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    l_wxyz = vec_sld( ldata, ldata, 12 );
-    r_wxyz = vec_sld( rdata, rdata, 12 );
-    qw = vec_nmsub( l_wxyz, r_wxyz, product );
-    xy = vec_madd( l_wxyz, r_wxyz, product );
-    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
-    result->vec128 = vec_sel( qv, qw, _VECTORMATH_MASK_0x000F );
-}
-
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
-{
-    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
-    qdata = quat->vec128;
-    vdata = vec->vec128;
-    tmp0 = vec_perm( qdata, qdata, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( vdata, vdata, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( qdata, qdata, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( vdata, vdata, _VECTORMATH_PERM_YZXW );
-    wwww = vec_splat( qdata, 3 );
-    qv = vec_madd( wwww, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qv = vec_madd( tmp0, tmp1, qv );
-    qv = vec_nmsub( tmp2, tmp3, qv );
-    product = vec_madd( qdata, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
-    qw = vec_add( vec_sld( product, product, 8 ), qw );
-    tmp1 = vec_perm( qv, qv, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( qv, qv, _VECTORMATH_PERM_YZXW );
-    res = vec_madd( vec_splat( qw, 0 ), qdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( wwww, qv, res );
-    res = vec_madd( tmp0, tmp1, res );
-    res = vec_nmsub( tmp2, tmp3, res );
-    result->vec128 = res;
-}
-
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = vec_xor( quat->vec128, ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) );
-}
-
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
-{
-    unsigned int tmp;
-    tmp = (unsigned int)-(select1 > 0);
-    result->vec128 = vec_sel( quat0->vec128, quat1->vec128, _vmathVuiSplatScalar(tmp) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint( const VmathQuat *quat )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat->vec128;
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-static inline void vmathQPrints( const VmathQuat *quat, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat->vec128;
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_C_H
+#define _VECTORMATH_QUAT_AOS_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
+        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
+        result->vec128 = (vec_float4){_x, _y, _z, _w};
+    } else {
+        float *pf = (float *)&result->vec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+        pf[3] = _w;
+    }
+}
+
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
+{
+    result->vec128 = xyz->vec128;
+    _vmathVfSetElement(result->vec128, _w, 3);
+}
+
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
+{
+    result->vec128 = _vmathVfSplatScalar(scalar);
+}
+
+static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathQMakeIdentity( VmathQuat *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0001;
+}
+
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    VmathQuat tmpQ_0, tmpQ_1;
+    vmathQSub( &tmpQ_0, quat1, quat0 );
+    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
+    vmathQAdd( result, quat0, &tmpQ_1 );
+}
+
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
+{
+    VmathQuat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot4( unitQuat0->vec128, unitQuat1->vec128 );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start.vec128 = vec_sel( unitQuat0->vec128, negatef4( unitQuat0->vec128 ), selectMask );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = _vmathVfSplatScalar(t);
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    result->vec128 = vec_madd( start.vec128, scale0, vec_madd( unitQuat1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
+{
+    VmathQuat tmp0, tmp1;
+    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
+    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
+    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
+}
+
+static inline vec_float4 vmathQGet128( const VmathQuat *quat )
+{
+    return quat->vec128;
+}
+
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec_sel( vec->vec128, result->vec128, _VECTORMATH_MASK_0x000F );
+}
+
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathQSetX( VmathQuat *result, float _x )
+{
+    _vmathVfSetElement(result->vec128, _x, 0);
+}
+
+static inline float vmathQGetX( const VmathQuat *quat )
+{
+    return _vmathVfGetElement(quat->vec128, 0);
+}
+
+static inline void vmathQSetY( VmathQuat *result, float _y )
+{
+    _vmathVfSetElement(result->vec128, _y, 1);
+}
+
+static inline float vmathQGetY( const VmathQuat *quat )
+{
+    return _vmathVfGetElement(quat->vec128, 1);
+}
+
+static inline void vmathQSetZ( VmathQuat *result, float _z )
+{
+    _vmathVfSetElement(result->vec128, _z, 2);
+}
+
+static inline float vmathQGetZ( const VmathQuat *quat )
+{
+    return _vmathVfGetElement(quat->vec128, 2);
+}
+
+static inline void vmathQSetW( VmathQuat *result, float _w )
+{
+    _vmathVfSetElement(result->vec128, _w, 3);
+}
+
+static inline float vmathQGetW( const VmathQuat *quat )
+{
+    return _vmathVfGetElement(quat->vec128, 3);
+}
+
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
+{
+    _vmathVfSetElement(result->vec128, value, idx);
+}
+
+static inline float vmathQGetElem( const VmathQuat *quat, int idx )
+{
+    return _vmathVfGetElement(quat->vec128, idx);
+}
+
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->vec128 = vec_add( quat0->vec128, quat1->vec128 );
+}
+
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->vec128 = vec_sub( quat0->vec128, quat1->vec128 );
+}
+
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->vec128 = vec_madd( quat->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->vec128 = divf4( quat->vec128, _vmathVfSplatScalar(scalar) );
+}
+
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = negatef4( quat->vec128 );
+}
+
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    vec_float4 result = _vmathVfDot4( quat0->vec128, quat1->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathQNorm( const VmathQuat *quat )
+{
+    vec_float4 result = _vmathVfDot4( quat->vec128, quat->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathQLength( const VmathQuat *quat )
+{
+    return sqrtf( vmathQNorm( quat ) );
+}
+
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
+{
+    vec_float4 dot = _vmathVfDot4( quat->vec128, quat->vec128 );
+    result->vec128 = vec_madd( quat->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    VmathVector3 crossVec, tmpV3_0;
+    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = vec_splat( cosAngle, 0 );
+    cosAngleX2Plus2 = vec_madd( cosAngle, ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){2.0f,2.0f,2.0f,2.0f}) );
+    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
+    cosHalfAngleX2 = vec_madd( recipCosHalfAngleX2, cosAngleX2Plus2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
+    crossVec = tmpV3_0;
+    res = vec_madd( crossVec.vec128, recipCosHalfAngleX2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_sel( res, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), _VECTORMATH_MASK_0x000F );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( vec_madd( unitVec->vec128, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c, _VECTORMATH_MASK_0x000F );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0xF000 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x0F00 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( _vmathVfSplatScalar(radians), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x00F0 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    result->vec128 = res;
+}
+
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = quat0->vec128;
+    rdata = quat1->vec128;
+    tmp0 = vec_perm( ldata, ldata, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( rdata, rdata, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( ldata, ldata, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( rdata, rdata, _VECTORMATH_PERM_YZXW );
+    qv = vec_madd( vec_splat( ldata, 3 ), rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_madd( ldata, rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    l_wxyz = vec_sld( ldata, ldata, 12 );
+    r_wxyz = vec_sld( rdata, rdata, 12 );
+    qw = vec_nmsub( l_wxyz, r_wxyz, product );
+    xy = vec_madd( l_wxyz, r_wxyz, product );
+    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
+    result->vec128 = vec_sel( qv, qw, _VECTORMATH_MASK_0x000F );
+}
+
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
+{
+    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat->vec128;
+    vdata = vec->vec128;
+    tmp0 = vec_perm( qdata, qdata, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( vdata, vdata, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( qdata, qdata, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( vdata, vdata, _VECTORMATH_PERM_YZXW );
+    wwww = vec_splat( qdata, 3 );
+    qv = vec_madd( wwww, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_madd( qdata, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
+    qw = vec_add( vec_sld( product, product, 8 ), qw );
+    tmp1 = vec_perm( qv, qv, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( qv, qv, _VECTORMATH_PERM_YZXW );
+    res = vec_madd( vec_splat( qw, 0 ), qdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( wwww, qv, res );
+    res = vec_madd( tmp0, tmp1, res );
+    res = vec_nmsub( tmp2, tmp3, res );
+    result->vec128 = res;
+}
+
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = vec_xor( quat->vec128, ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) );
+}
+
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
+{
+    unsigned int tmp;
+    tmp = (unsigned int)-(select1 > 0);
+    result->vec128 = vec_sel( quat0->vec128, quat1->vec128, _vmathVuiSplatScalar(tmp) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint( const VmathQuat *quat )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat->vec128;
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+static inline void vmathQPrints( const VmathQuat *quat, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat->vec128;
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos_v.h
index 04cf6ccf9..cc519d805 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_aos_v.h
@@ -1,312 +1,312 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_V_C_H
-#define _VECTORMATH_QUAT_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
-{
-    VmathQuat result;
-    vmathQMakeFromV4(&result, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
-{
-    VmathQuat result;
-    vmathQMakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 )
-{
-    VmathQuat result;
-    vmathQMakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeIdentity_V( )
-{
-    VmathQuat result;
-    vmathQMakeIdentity(&result);
-    return result;
-}
-
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQLerp(&result, t, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
-{
-    VmathQuat result;
-    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
-{
-    VmathQuat result;
-    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
-    return result;
-}
-
-static inline vec_float4 vmathQGet128_V( VmathQuat quat )
-{
-    return vmathQGet128(&quat);
-}
-
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
-{
-    vmathQSetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
-{
-    VmathVector3 result;
-    vmathQGetXYZ(&result, &quat);
-    return result;
-}
-
-static inline void vmathQSetX_V( VmathQuat *result, float _x )
-{
-    vmathQSetX(result, _x);
-}
-
-static inline float vmathQGetX_V( VmathQuat quat )
-{
-    return vmathQGetX(&quat);
-}
-
-static inline void vmathQSetY_V( VmathQuat *result, float _y )
-{
-    vmathQSetY(result, _y);
-}
-
-static inline float vmathQGetY_V( VmathQuat quat )
-{
-    return vmathQGetY(&quat);
-}
-
-static inline void vmathQSetZ_V( VmathQuat *result, float _z )
-{
-    vmathQSetZ(result, _z);
-}
-
-static inline float vmathQGetZ_V( VmathQuat quat )
-{
-    return vmathQGetZ(&quat);
-}
-
-static inline void vmathQSetW_V( VmathQuat *result, float _w )
-{
-    vmathQSetW(result, _w);
-}
-
-static inline float vmathQGetW_V( VmathQuat quat )
-{
-    return vmathQGetW(&quat);
-}
-
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
-{
-    vmathQSetElem(result, idx, value);
-}
-
-static inline float vmathQGetElem_V( VmathQuat quat, int idx )
-{
-    return vmathQGetElem(&quat, idx);
-}
-
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQAdd(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQSub(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarMul(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarDiv(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQNeg_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNeg(&result, &quat);
-    return result;
-}
-
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    return vmathQDot(&quat0, &quat1);
-}
-
-static inline float vmathQNorm_V( VmathQuat quat )
-{
-    return vmathQNorm(&quat);
-}
-
-static inline float vmathQLength_V( VmathQuat quat )
-{
-    return vmathQLength(&quat);
-}
-
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNormalize(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathQuat result;
-    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathQuat result;
-    vmathQMakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationX_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationY_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationZ_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQMul(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathQRotate(&result, &quat, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQConj_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQConj(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
-{
-    VmathQuat result;
-    vmathQSelect(&result, &quat0, &quat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint_V( VmathQuat quat )
-{
-    vmathQPrint(&quat);
-}
-
-static inline void vmathQPrints_V( VmathQuat quat, const char *name )
-{
-    vmathQPrints(&quat, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_V_C_H
+#define _VECTORMATH_QUAT_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
+{
+    VmathQuat result;
+    vmathQMakeFromV4(&result, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
+{
+    VmathQuat result;
+    vmathQMakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 )
+{
+    VmathQuat result;
+    vmathQMakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeIdentity_V( )
+{
+    VmathQuat result;
+    vmathQMakeIdentity(&result);
+    return result;
+}
+
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQLerp(&result, t, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
+{
+    VmathQuat result;
+    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
+{
+    VmathQuat result;
+    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
+    return result;
+}
+
+static inline vec_float4 vmathQGet128_V( VmathQuat quat )
+{
+    return vmathQGet128(&quat);
+}
+
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
+{
+    vmathQSetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
+{
+    VmathVector3 result;
+    vmathQGetXYZ(&result, &quat);
+    return result;
+}
+
+static inline void vmathQSetX_V( VmathQuat *result, float _x )
+{
+    vmathQSetX(result, _x);
+}
+
+static inline float vmathQGetX_V( VmathQuat quat )
+{
+    return vmathQGetX(&quat);
+}
+
+static inline void vmathQSetY_V( VmathQuat *result, float _y )
+{
+    vmathQSetY(result, _y);
+}
+
+static inline float vmathQGetY_V( VmathQuat quat )
+{
+    return vmathQGetY(&quat);
+}
+
+static inline void vmathQSetZ_V( VmathQuat *result, float _z )
+{
+    vmathQSetZ(result, _z);
+}
+
+static inline float vmathQGetZ_V( VmathQuat quat )
+{
+    return vmathQGetZ(&quat);
+}
+
+static inline void vmathQSetW_V( VmathQuat *result, float _w )
+{
+    vmathQSetW(result, _w);
+}
+
+static inline float vmathQGetW_V( VmathQuat quat )
+{
+    return vmathQGetW(&quat);
+}
+
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
+{
+    vmathQSetElem(result, idx, value);
+}
+
+static inline float vmathQGetElem_V( VmathQuat quat, int idx )
+{
+    return vmathQGetElem(&quat, idx);
+}
+
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQAdd(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQSub(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarMul(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarDiv(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQNeg_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNeg(&result, &quat);
+    return result;
+}
+
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    return vmathQDot(&quat0, &quat1);
+}
+
+static inline float vmathQNorm_V( VmathQuat quat )
+{
+    return vmathQNorm(&quat);
+}
+
+static inline float vmathQLength_V( VmathQuat quat )
+{
+    return vmathQLength(&quat);
+}
+
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNormalize(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathQuat result;
+    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathQuat result;
+    vmathQMakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationX_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationY_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationZ_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQMul(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathQRotate(&result, &quat, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQConj_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQConj(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
+{
+    VmathQuat result;
+    vmathQSelect(&result, &quat0, &quat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint_V( VmathQuat quat )
+{
+    vmathQPrint(&quat);
+}
+
+static inline void vmathQPrints_V( VmathQuat quat, const char *name )
+{
+    vmathQPrints(&quat, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa.h
index 334c17b01..497a262fe 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa.h
@@ -1,415 +1,415 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_C_H
-#define _VECTORMATH_QUAT_SOA_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 _w )
-{
-    vmathSoaQSetXYZ( result, xyz );
-    vmathSoaQSetW( result, _w );
-}
-
-static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat )
-{
-    vec_float4 vec128 = quat->vec128;
-    result->x = vec_splat( vec128, 0 );
-    result->y = vec_splat( vec128, 1 );
-    result->z = vec_splat( vec128, 2 );
-    result->w = vec_splat( vec128, 3 );
-}
-
-static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( quat0->vec128, quat2->vec128 );
-    tmp1 = vec_mergeh( quat1->vec128, quat3->vec128 );
-    tmp2 = vec_mergel( quat0->vec128, quat2->vec128 );
-    tmp3 = vec_mergel( quat1->vec128, quat3->vec128 );
-    result->x = vec_mergeh( tmp0, tmp1 );
-    result->y = vec_mergel( tmp0, tmp1 );
-    result->z = vec_mergeh( tmp2, tmp3 );
-    result->w = vec_mergel( tmp2, tmp3 );
-}
-
-static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result )
-{
-    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    VmathSoaQuat tmpQ_0, tmpQ_1;
-    vmathSoaQSub( &tmpQ_0, quat1, quat0 );
-    vmathSoaQScalarMul( &tmpQ_1, &tmpQ_0, t );
-    vmathSoaQAdd( result, quat0, &tmpQ_1 );
-}
-
-static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 )
-{
-    VmathSoaQuat start, tmpQ_0, tmpQ_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaQDot( unitQuat0, unitQuat1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){0.0f,0.0f,0.0f,0.0f}, cosAngle );
-    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    vmathSoaQSetX( &start, vec_sel( unitQuat0->x, negatef4( unitQuat0->x ), selectMask ) );
-    vmathSoaQSetY( &start, vec_sel( unitQuat0->y, negatef4( unitQuat0->y ), selectMask ) );
-    vmathSoaQSetZ( &start, vec_sel( unitQuat0->z, negatef4( unitQuat0->z ), selectMask ) );
-    vmathSoaQSetW( &start, vec_sel( unitQuat0->w, negatef4( unitQuat0->w ), selectMask ) );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    vmathSoaQScalarMul( &tmpQ_0, &start, scale0 );
-    vmathSoaQScalarMul( &tmpQ_1, unitQuat1, scale1 );
-    vmathSoaQAdd( result, &tmpQ_0, &tmpQ_1 );
-}
-
-static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 )
-{
-    VmathSoaQuat tmp0, tmp1;
-    vmathSoaQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
-    vmathSoaQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
-    vmathSoaQSlerp( result, vec_madd( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), t, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), &tmp0, &tmp1 );
-}
-
-static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( quat->x, quat->z );
-    tmp1 = vec_mergeh( quat->y, quat->w );
-    tmp2 = vec_mergel( quat->x, quat->z );
-    tmp3 = vec_mergel( quat->y, quat->w );
-    vmathQMakeFrom128( result0, vec_mergeh( tmp0, tmp1 ) );
-    vmathQMakeFrom128( result1, vec_mergel( tmp0, tmp1 ) );
-    vmathQMakeFrom128( result2, vec_mergeh( tmp2, tmp3 ) );
-    vmathQMakeFrom128( result3, vec_mergel( tmp2, tmp3 ) );
-}
-
-static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat )
-{
-    vmathSoaV3MakeFromElems( result, quat->x, quat->y, quat->z );
-}
-
-static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat )
-{
-    return quat->x;
-}
-
-static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat )
-{
-    return quat->y;
-}
-
-static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat )
-{
-    return quat->z;
-}
-
-static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 _w )
-{
-    result->w = _w;
-}
-
-static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat )
-{
-    return quat->w;
-}
-
-static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx )
-{
-    return *(&quat->x + idx);
-}
-
-static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    result->x = vec_add( quat0->x, quat1->x );
-    result->y = vec_add( quat0->y, quat1->y );
-    result->z = vec_add( quat0->z, quat1->z );
-    result->w = vec_add( quat0->w, quat1->w );
-}
-
-static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    result->x = vec_sub( quat0->x, quat1->x );
-    result->y = vec_sub( quat0->y, quat1->y );
-    result->z = vec_sub( quat0->z, quat1->z );
-    result->w = vec_sub( quat0->w, quat1->w );
-}
-
-static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
-{
-    result->x = vec_madd( quat->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( quat->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( quat->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->w = vec_madd( quat->w, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
-{
-    result->x = divf4( quat->x, scalar );
-    result->y = divf4( quat->y, scalar );
-    result->z = divf4( quat->z, scalar );
-    result->w = divf4( quat->w, scalar );
-}
-
-static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    result->x = negatef4( quat->x );
-    result->y = negatef4( quat->y );
-    result->z = negatef4( quat->z );
-    result->w = negatef4( quat->w );
-}
-
-static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    vec_float4 result;
-    result = vec_madd( quat0->x, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( quat0->y, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat0->z, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat0->w, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat )
-{
-    vec_float4 result;
-    result = vec_madd( quat->x, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( quat->y, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat->z, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat->w, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat )
-{
-    return sqrtf4( vmathSoaQNorm( quat ) );
-}
-
-static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaQNorm( quat );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    result->x = vec_madd( quat->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( quat->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( quat->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->w = vec_madd( quat->w, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf4( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), vec_add( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( unitVec0, unitVec1 ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    recipCosHalfAngleX2 = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), cosHalfAngleX2 );
-    vmathSoaV3Cross( &tmpV3_0, unitVec0, unitVec1 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
-    vmathSoaQMakeFromV3Scalar( result, &tmpV3_1, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    VmathSoaVector3 tmpV3_0;
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    vmathSoaV3ScalarMul( &tmpV3_0, unitVec, s );
-    vmathSoaQMakeFromV3Scalar( result, &tmpV3_0, c );
-}
-
-static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-}
-
-static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-}
-
-static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, c );
-}
-
-static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->x, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->y, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->x, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->z, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->x, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpW = vec_sub( vec_sub( vec_sub( vec_madd( quat0->w, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->x, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *quat, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = vec_sub( vec_add( vec_madd( quat->w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_sub( vec_add( vec_madd( quat->w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_sub( vec_add( vec_madd( quat->w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpW = vec_add( vec_add( vec_madd( quat->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->x = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpX, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->y = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpY, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result->z = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpZ, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    vmathSoaQMakeFromElems( result, negatef4( quat->x ), negatef4( quat->y ), negatef4( quat->z ), quat->w );
-}
-
-static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 )
-{
-    result->x = vec_sel( quat0->x, quat1->x, select1 );
-    result->y = vec_sel( quat0->y, quat1->y, select1 );
-    result->z = vec_sel( quat0->z, quat1->z, select1 );
-    result->w = vec_sel( quat0->w, quat1->w, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaQPrint( const VmathSoaQuat *quat )
-{
-    VmathQuat vec0, vec1, vec2, vec3;
-    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathQPrint( &vec0 );
-    printf("slot 1:\n");
-    vmathQPrint( &vec1 );
-    printf("slot 2:\n");
-    vmathQPrint( &vec2 );
-    printf("slot 3:\n");
-    vmathQPrint( &vec3 );
-}
-
-static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name )
-{
-    VmathQuat vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathQPrint( &vec0 );
-    printf("slot 1:\n");
-    vmathQPrint( &vec1 );
-    printf("slot 2:\n");
-    vmathQPrint( &vec2 );
-    printf("slot 3:\n");
-    vmathQPrint( &vec3 );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_C_H
+#define _VECTORMATH_QUAT_SOA_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 _w )
+{
+    vmathSoaQSetXYZ( result, xyz );
+    vmathSoaQSetW( result, _w );
+}
+
+static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat )
+{
+    vec_float4 vec128 = quat->vec128;
+    result->x = vec_splat( vec128, 0 );
+    result->y = vec_splat( vec128, 1 );
+    result->z = vec_splat( vec128, 2 );
+    result->w = vec_splat( vec128, 3 );
+}
+
+static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( quat0->vec128, quat2->vec128 );
+    tmp1 = vec_mergeh( quat1->vec128, quat3->vec128 );
+    tmp2 = vec_mergel( quat0->vec128, quat2->vec128 );
+    tmp3 = vec_mergel( quat1->vec128, quat3->vec128 );
+    result->x = vec_mergeh( tmp0, tmp1 );
+    result->y = vec_mergel( tmp0, tmp1 );
+    result->z = vec_mergeh( tmp2, tmp3 );
+    result->w = vec_mergel( tmp2, tmp3 );
+}
+
+static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result )
+{
+    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    VmathSoaQuat tmpQ_0, tmpQ_1;
+    vmathSoaQSub( &tmpQ_0, quat1, quat0 );
+    vmathSoaQScalarMul( &tmpQ_1, &tmpQ_0, t );
+    vmathSoaQAdd( result, quat0, &tmpQ_1 );
+}
+
+static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 )
+{
+    VmathSoaQuat start, tmpQ_0, tmpQ_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaQDot( unitQuat0, unitQuat1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){0.0f,0.0f,0.0f,0.0f}, cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    vmathSoaQSetX( &start, vec_sel( unitQuat0->x, negatef4( unitQuat0->x ), selectMask ) );
+    vmathSoaQSetY( &start, vec_sel( unitQuat0->y, negatef4( unitQuat0->y ), selectMask ) );
+    vmathSoaQSetZ( &start, vec_sel( unitQuat0->z, negatef4( unitQuat0->z ), selectMask ) );
+    vmathSoaQSetW( &start, vec_sel( unitQuat0->w, negatef4( unitQuat0->w ), selectMask ) );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    vmathSoaQScalarMul( &tmpQ_0, &start, scale0 );
+    vmathSoaQScalarMul( &tmpQ_1, unitQuat1, scale1 );
+    vmathSoaQAdd( result, &tmpQ_0, &tmpQ_1 );
+}
+
+static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 )
+{
+    VmathSoaQuat tmp0, tmp1;
+    vmathSoaQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
+    vmathSoaQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
+    vmathSoaQSlerp( result, vec_madd( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), t, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), &tmp0, &tmp1 );
+}
+
+static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( quat->x, quat->z );
+    tmp1 = vec_mergeh( quat->y, quat->w );
+    tmp2 = vec_mergel( quat->x, quat->z );
+    tmp3 = vec_mergel( quat->y, quat->w );
+    vmathQMakeFrom128( result0, vec_mergeh( tmp0, tmp1 ) );
+    vmathQMakeFrom128( result1, vec_mergel( tmp0, tmp1 ) );
+    vmathQMakeFrom128( result2, vec_mergeh( tmp2, tmp3 ) );
+    vmathQMakeFrom128( result3, vec_mergel( tmp2, tmp3 ) );
+}
+
+static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat )
+{
+    vmathSoaV3MakeFromElems( result, quat->x, quat->y, quat->z );
+}
+
+static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat )
+{
+    return quat->x;
+}
+
+static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat )
+{
+    return quat->y;
+}
+
+static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat )
+{
+    return quat->z;
+}
+
+static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 _w )
+{
+    result->w = _w;
+}
+
+static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat )
+{
+    return quat->w;
+}
+
+static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx )
+{
+    return *(&quat->x + idx);
+}
+
+static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    result->x = vec_add( quat0->x, quat1->x );
+    result->y = vec_add( quat0->y, quat1->y );
+    result->z = vec_add( quat0->z, quat1->z );
+    result->w = vec_add( quat0->w, quat1->w );
+}
+
+static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    result->x = vec_sub( quat0->x, quat1->x );
+    result->y = vec_sub( quat0->y, quat1->y );
+    result->z = vec_sub( quat0->z, quat1->z );
+    result->w = vec_sub( quat0->w, quat1->w );
+}
+
+static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
+{
+    result->x = vec_madd( quat->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( quat->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( quat->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->w = vec_madd( quat->w, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
+{
+    result->x = divf4( quat->x, scalar );
+    result->y = divf4( quat->y, scalar );
+    result->z = divf4( quat->z, scalar );
+    result->w = divf4( quat->w, scalar );
+}
+
+static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    result->x = negatef4( quat->x );
+    result->y = negatef4( quat->y );
+    result->z = negatef4( quat->z );
+    result->w = negatef4( quat->w );
+}
+
+static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    vec_float4 result;
+    result = vec_madd( quat0->x, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( quat0->y, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat0->z, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat0->w, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat )
+{
+    vec_float4 result;
+    result = vec_madd( quat->x, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( quat->y, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat->z, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat->w, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat )
+{
+    return sqrtf4( vmathSoaQNorm( quat ) );
+}
+
+static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaQNorm( quat );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    result->x = vec_madd( quat->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( quat->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( quat->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->w = vec_madd( quat->w, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf4( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), vec_add( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vmathSoaV3Dot( unitVec0, unitVec1 ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    recipCosHalfAngleX2 = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), cosHalfAngleX2 );
+    vmathSoaV3Cross( &tmpV3_0, unitVec0, unitVec1 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
+    vmathSoaQMakeFromV3Scalar( result, &tmpV3_1, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    VmathSoaVector3 tmpV3_0;
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    vmathSoaV3ScalarMul( &tmpV3_0, unitVec, s );
+    vmathSoaQMakeFromV3Scalar( result, &tmpV3_0, c );
+}
+
+static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+}
+
+static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+}
+
+static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, c );
+}
+
+static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->x, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->y, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->x, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_sub( vec_add( vec_add( vec_madd( quat0->w, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->z, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->x, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpW = vec_sub( vec_sub( vec_sub( vec_madd( quat0->w, quat1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat0->x, quat1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->y, quat1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat0->z, quat1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *quat, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = vec_sub( vec_add( vec_madd( quat->w, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->y, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->z, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_sub( vec_add( vec_madd( quat->w, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->z, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->x, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_sub( vec_add( vec_madd( quat->w, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->x, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->y, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpW = vec_add( vec_add( vec_madd( quat->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->x = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpX, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->y = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpY, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result->z = vec_add( vec_sub( vec_add( vec_madd( tmpW, quat->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpZ, quat->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    vmathSoaQMakeFromElems( result, negatef4( quat->x ), negatef4( quat->y ), negatef4( quat->z ), quat->w );
+}
+
+static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 )
+{
+    result->x = vec_sel( quat0->x, quat1->x, select1 );
+    result->y = vec_sel( quat0->y, quat1->y, select1 );
+    result->z = vec_sel( quat0->z, quat1->z, select1 );
+    result->w = vec_sel( quat0->w, quat1->w, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaQPrint( const VmathSoaQuat *quat )
+{
+    VmathQuat vec0, vec1, vec2, vec3;
+    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathQPrint( &vec0 );
+    printf("slot 1:\n");
+    vmathQPrint( &vec1 );
+    printf("slot 2:\n");
+    vmathQPrint( &vec2 );
+    printf("slot 3:\n");
+    vmathQPrint( &vec3 );
+}
+
+static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name )
+{
+    VmathQuat vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathQPrint( &vec0 );
+    printf("slot 1:\n");
+    vmathQPrint( &vec1 );
+    printf("slot 2:\n");
+    vmathQPrint( &vec2 );
+    printf("slot 3:\n");
+    vmathQPrint( &vec3 );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa_v.h
index 601d9da92..f51b43809 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/quat_soa_v.h
@@ -1,319 +1,319 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_V_C_H
-#define _VECTORMATH_QUAT_SOA_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromV4(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromAos(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFrom4Aos(&result, &quat0, &quat1, &quat2, &quat3);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeIdentity_V( )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeIdentity(&result);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQLerp(&result, t, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSlerp(&result, t, &unitQuat0, &unitQuat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
-    return result;
-}
-
-static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
-{
-    vmathSoaQGet4Aos(&quat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec )
-{
-    vmathSoaQSetXYZ(result, &vec);
-}
-
-static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat )
-{
-    VmathSoaVector3 result;
-    vmathSoaQGetXYZ(&result, &quat);
-    return result;
-}
-
-static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 _x )
-{
-    vmathSoaQSetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetX(&quat);
-}
-
-static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 _y )
-{
-    vmathSoaQSetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetY(&quat);
-}
-
-static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 _z )
-{
-    vmathSoaQSetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetZ(&quat);
-}
-
-static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 _w )
-{
-    vmathSoaQSetW(result, _w);
-}
-
-static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetW(&quat);
-}
-
-static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value )
-{
-    vmathSoaQSetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx )
-{
-    return vmathSoaQGetElem(&quat, idx);
-}
-
-static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQAdd(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSub(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQScalarMul(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQScalarDiv(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQNeg(&result, &quat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    return vmathSoaQDot(&quat0, &quat1);
-}
-
-static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat )
-{
-    return vmathSoaQNorm(&quat);
-}
-
-static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat )
-{
-    return vmathSoaQLength(&quat);
-}
-
-static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQNormalize(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationArc(&result, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMul(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat quat, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaQRotate(&result, &quat, &vec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQConj(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSelect(&result, &quat0, &quat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaQPrint_V( VmathSoaQuat quat )
-{
-    vmathSoaQPrint(&quat);
-}
-
-static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name )
-{
-    vmathSoaQPrints(&quat, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_V_C_H
+#define _VECTORMATH_QUAT_SOA_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromV4(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromAos(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFrom4Aos(&result, &quat0, &quat1, &quat2, &quat3);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeIdentity_V( )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeIdentity(&result);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQLerp(&result, t, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSlerp(&result, t, &unitQuat0, &unitQuat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
+    return result;
+}
+
+static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
+{
+    vmathSoaQGet4Aos(&quat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec )
+{
+    vmathSoaQSetXYZ(result, &vec);
+}
+
+static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat )
+{
+    VmathSoaVector3 result;
+    vmathSoaQGetXYZ(&result, &quat);
+    return result;
+}
+
+static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 _x )
+{
+    vmathSoaQSetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetX(&quat);
+}
+
+static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 _y )
+{
+    vmathSoaQSetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetY(&quat);
+}
+
+static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 _z )
+{
+    vmathSoaQSetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetZ(&quat);
+}
+
+static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 _w )
+{
+    vmathSoaQSetW(result, _w);
+}
+
+static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetW(&quat);
+}
+
+static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value )
+{
+    vmathSoaQSetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx )
+{
+    return vmathSoaQGetElem(&quat, idx);
+}
+
+static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQAdd(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSub(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQScalarMul(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQScalarDiv(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQNeg(&result, &quat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    return vmathSoaQDot(&quat0, &quat1);
+}
+
+static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat )
+{
+    return vmathSoaQNorm(&quat);
+}
+
+static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat )
+{
+    return vmathSoaQLength(&quat);
+}
+
+static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQNormalize(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationArc(&result, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMul(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat quat, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaQRotate(&result, &quat, &vec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQConj(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSelect(&result, &quat0, &quat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaQPrint_V( VmathSoaQuat quat )
+{
+    vmathSoaQPrint(&quat);
+}
+
+static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name )
+{
+    vmathSoaQPrints(&quat, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos.h
index c7858cfe5..256828410 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos.h
@@ -1,1124 +1,1125 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_C_H
-#define _VECTORMATH_VEC_AOS_C_H
-#include <altivec.h>
-#include <vec_types.h>
-#include <simdmath.h>
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for permutes words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
-#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
-#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
-#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
-#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
-#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
-#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
-#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
-    return vec_madd( vec_sld( vec0, vec0, 8 ), vec_sld( vec1, vec1, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
-    return vec_add( vec_sld( result, result, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
-    tmp0 = vec_perm( vec0, vec0, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( vec1, vec1, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( vec0, vec0, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( vec1, vec1, _VECTORMATH_PERM_YZXW );
-    result = vec_madd( tmp0, tmp1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_nmsub( tmp2, tmp3, result );
-    return result;
-}
-
-static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
-{
-    vec_int4 bexp;
-    vec_uint4 mant, sign, hfloat;
-    vec_uint4 notZero, isInf;
-    const vec_uint4 hfloatInf = (vec_uint4){0x00007c00u,0x00007c00u,0x00007c00u,0x00007c00u};
-    const vec_uint4 mergeMant = (vec_uint4){0x000003ffu,0x000003ffu,0x000003ffu,0x000003ffu};
-    const vec_uint4 mergeSign = (vec_uint4){0x00008000u,0x00008000u,0x00008000u,0x00008000u};
-
-    sign = vec_sr((vec_uint4)v, (vec_uint4){16,16,16,16});
-    mant = vec_sr((vec_uint4)v, (vec_uint4){13,13,13,13});
-    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4){23,23,23,23}), (vec_int4){0xff,0xff,0xff,0xff});
-
-    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4){112,112,112,112});
-    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4){142,142,142,142});
-
-    bexp = vec_add(bexp, (vec_int4){-112,-112,-112,-112});
-    bexp = vec_sl(bexp, (vec_uint4){10,10,10,10});
-
-    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
-    hfloat = vec_sel((vec_uint4){0,0,0,0}, hfloat, notZero);
-    hfloat = vec_sel(hfloat, hfloatInf, isInf);
-    hfloat = vec_sel(hfloat, sign, mergeSign);
-
-    return hfloat;
-}
-
-static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
-{
-    vec_uint4 hfloat_u, hfloat_v;
-    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
-    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
-    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
-    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
-}
-
-#ifndef __GNUC__
-#define __builtin_constant_p(x) 0
-#endif
-
-static inline vec_float4 _vmathVfInsert(vec_float4 dst, vec_float4 src, int slot)
-{
-#ifdef __GNUC__
-    if (__builtin_constant_p(slot)) {
-        dst = vec_sld(dst, dst, slot<<2);
-        dst = vec_sld(dst, src, 4);
-        if (slot != 3) dst = vec_sld(dst, dst, (3-slot)<<2);
-        return dst;
-    } else
-#endif
-    {
-        vec_uchar16 shiftpattern = vec_lvsr( 0, (float *)(size_t)(slot<<2) );
-        vec_uint4 selectmask = (vec_uint4)vec_perm( (vec_uint4){0,0,0,0}, _VECTORMATH_MASK_0xF000, shiftpattern );
-        return vec_sel( dst, src, selectmask );
-    }
-}
-
-#define _vmathVfGetElement(vec, slot) ((float *)&(vec))[slot]
-#ifdef _VECTORMATH_SET_CONSTS_IN_MEM
-#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
-#else
-#define _vmathVfSetElement(vec, scalar, slot)                                            \
-{                                                                                        \
-    if (__builtin_constant_p(scalar)) {                                                  \
-        (vec) = _vmathVfInsert(vec, (vec_float4){scalar, scalar, scalar, scalar}, slot); \
-    } else {                                                                             \
-        ((float *)&(vec))[slot] = scalar;                                                \
-    }                                                                                    \
-}
-#endif
-
-static inline vec_float4 _vmathVfSplatScalar(float scalar)
-{
-    vec_float4 result;
-    if (__builtin_constant_p(scalar)) {
-        result = (vec_float4){scalar, scalar, scalar, scalar};
-    } else {
-        result = vec_ld(0, &scalar);
-        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
-    } 
-    return result;
-}
-
-static inline vec_uint4 _vmathVuiSplatScalar(unsigned int scalar)
-{
-    vec_uint4 result;
-    if (__builtin_constant_p(scalar)) {
-        result = (vec_uint4){scalar, scalar, scalar, scalar};
-    } else {
-        result = vec_ld(0, &scalar);
-        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
-    } 
-    return result;
-}
-
-#endif
-
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
-        result->vec128 = (vec_float4){_x, _y, _z, 0.0f};
-    } else {
-        float *pf = (float *)&result->vec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-    }
-}
-
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = pnt->vec128;
-}
-
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
-{
-    result->vec128 = _vmathVfSplatScalar(scalar);
-}
-
-static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathV3MakeXAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_1000;
-}
-
-static inline void vmathV3MakeYAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0100;
-}
-
-static inline void vmathV3MakeZAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0010;
-}
-
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathV3Sub( &tmpV3_0, vec1, vec0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathV3Add( result, vec0, &tmpV3_1 );
-}
-
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = _vmathVfSplatScalar(t);
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    result->vec128 = vec_madd( unitVec0->vec128, scale0, vec_madd( unitVec1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline vec_float4 vmathV3Get128( const VmathVector3 *vec )
-{
-    return vec->vec128;
-}
-
-static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
-    dstVec = vec_sel(vec->vec128, dstVec, mask);
-    *quad = dstVec;
-}
-
-static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = vec_sld( xyzx, yzxy, 12 );
-    xyz2 = vec_sld( yzxy, zxyz, 8 );
-    xyz3 = vec_sld( zxyz, zxyz, 4 );
-    vec0->vec128 = xyzx;
-    vec1->vec128 = xyz1;
-    vec2->vec128 = xyz2;
-    vec3->vec128 = xyz3;
-}
-
-static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = vec_perm( vec0->vec128, vec1->vec128, _VECTORMATH_PERM_XYZA );
-    yzxy = vec_perm( vec1->vec128, vec2->vec128, _VECTORMATH_PERM_YZAB );
-    zxyz = vec_perm( vec2->vec128, vec3->vec128, _VECTORMATH_PERM_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathV3StoreXYZArray( vec0, vec1, vec2, vec3, xyz0 );
-    vmathV3StoreXYZArray( vec4, vec5, vec6, vec7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathV3SetX( VmathVector3 *result, float _x )
-{
-    _vmathVfSetElement(result->vec128, _x, 0);
-}
-
-static inline float vmathV3GetX( const VmathVector3 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 0);
-}
-
-static inline void vmathV3SetY( VmathVector3 *result, float _y )
-{
-    _vmathVfSetElement(result->vec128, _y, 1);
-}
-
-static inline float vmathV3GetY( const VmathVector3 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 1);
-}
-
-static inline void vmathV3SetZ( VmathVector3 *result, float _z )
-{
-    _vmathVfSetElement(result->vec128, _z, 2);
-}
-
-static inline float vmathV3GetZ( const VmathVector3 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 2);
-}
-
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
-{
-    _vmathVfSetElement(result->vec128, value, idx);
-}
-
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
-{
-    return _vmathVfGetElement(vec->vec128, idx);
-}
-
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = vec_add( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = vec_sub( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = vec_add( vec->vec128, pnt1->vec128 );
-}
-
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->vec128 = vec_madd( vec->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->vec128 = divf4( vec->vec128, _vmathVfSplatScalar(scalar) );
-}
-
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = negatef4( vec->vec128 );
-}
-
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = vec_madd( vec0->vec128, vec1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = recipf4( vec->vec128 );
-}
-
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = sqrtf4( vec->vec128 );
-}
-
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = rsqrtf4( vec->vec128 );
-}
-
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = fabsf4( vec->vec128 );
-}
-
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV3MaxElem( const VmathVector3 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = fmaxf4( vec_splat( vec->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV3MinElem( const VmathVector3 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = fminf4( vec_splat( vec->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV3Sum( const VmathVector3 *vec )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = vec_add( vec_splat( vec->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    vec_float4 result = _vmathVfDot3( vec0->vec128, vec1->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV3LengthSqr( const VmathVector3 *vec )
-{
-    vec_float4 result = _vmathVfDot3( vec->vec128, vec->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV3Length( const VmathVector3 *vec )
-{
-    return sqrtf( vmathV3LengthSqr( vec ) );
-}
-
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
-{
-    vec_float4 dot = _vmathVfDot3( vec->vec128, vec->vec128 );
-    dot = vec_splat( dot, 0 );
-    result->vec128 = vec_madd( vec->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = _vmathVfCross( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
-{
-    unsigned int tmp;
-    tmp = (unsigned int)-(select1 > 0);
-    result->vec128 = vec_sel( vec0->vec128, vec1->vec128, _vmathVuiSplatScalar(tmp) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print( const VmathVector3 *vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
-        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
-        result->vec128 = (vec_float4){_x, _y, _z, _w};
-    } else {
-        float *pf = (float *)&result->vec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-        pf[3] = _w;
-    }
-}
-
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
-{
-    result->vec128 = xyz->vec128;
-    _vmathVfSetElement(result->vec128, _w, 3);
-}
-
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec->vec128;
-    result->vec128 = _vmathVfInsert(result->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), 3);
-}
-
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = pnt->vec128;
-    result->vec128 = _vmathVfInsert(result->vec128, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), 3);
-}
-
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
-{
-    result->vec128 = _vmathVfSplatScalar(scalar);
-}
-
-static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathV4MakeXAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_1000;
-}
-
-static inline void vmathV4MakeYAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0100;
-}
-
-static inline void vmathV4MakeZAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0010;
-}
-
-static inline void vmathV4MakeWAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0001;
-}
-
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    VmathVector4 tmpV4_0, tmpV4_1;
-    vmathV4Sub( &tmpV4_0, vec1, vec0 );
-    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
-    vmathV4Add( result, vec0, &tmpV4_1 );
-}
-
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot4( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = _vmathVfSplatScalar(t);
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    result->vec128 = vec_madd( unitVec0->vec128, scale0, vec_madd( unitVec1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-static inline vec_float4 vmathV4Get128( const VmathVector4 *vec )
-{
-    return vec->vec128;
-}
-
-static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads )
-{
-    twoQuads[0] = _vmath2VfToHalfFloats(vec0->vec128, vec1->vec128);
-    twoQuads[1] = _vmath2VfToHalfFloats(vec2->vec128, vec3->vec128);
-}
-
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec_sel( vec->vec128, result->vec128, _VECTORMATH_MASK_0x000F );
-}
-
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV4SetX( VmathVector4 *result, float _x )
-{
-    _vmathVfSetElement(result->vec128, _x, 0);
-}
-
-static inline float vmathV4GetX( const VmathVector4 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 0);
-}
-
-static inline void vmathV4SetY( VmathVector4 *result, float _y )
-{
-    _vmathVfSetElement(result->vec128, _y, 1);
-}
-
-static inline float vmathV4GetY( const VmathVector4 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 1);
-}
-
-static inline void vmathV4SetZ( VmathVector4 *result, float _z )
-{
-    _vmathVfSetElement(result->vec128, _z, 2);
-}
-
-static inline float vmathV4GetZ( const VmathVector4 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 2);
-}
-
-static inline void vmathV4SetW( VmathVector4 *result, float _w )
-{
-    _vmathVfSetElement(result->vec128, _w, 3);
-}
-
-static inline float vmathV4GetW( const VmathVector4 *vec )
-{
-    return _vmathVfGetElement(vec->vec128, 3);
-}
-
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
-{
-    _vmathVfSetElement(result->vec128, value, idx);
-}
-
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
-{
-    return _vmathVfGetElement(vec->vec128, idx);
-}
-
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = vec_add( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = vec_sub( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->vec128 = vec_madd( vec->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->vec128 = divf4( vec->vec128, _vmathVfSplatScalar(scalar) );
-}
-
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = negatef4( vec->vec128 );
-}
-
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = vec_madd( vec0->vec128, vec1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = recipf4( vec->vec128 );
-}
-
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = sqrtf4( vec->vec128 );
-}
-
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = rsqrtf4( vec->vec128 );
-}
-
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = fabsf4( vec->vec128 );
-}
-
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV4MaxElem( const VmathVector4 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = fmaxf4( vec_splat( vec->vec128, 2 ), result );
-    result = fmaxf4( vec_splat( vec->vec128, 3 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV4MinElem( const VmathVector4 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = fminf4( vec_splat( vec->vec128, 2 ), result );
-    result = fminf4( vec_splat( vec->vec128, 3 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV4Sum( const VmathVector4 *vec )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( vec->vec128, 1 ), vec->vec128 );
-    result = vec_add( vec_splat( vec->vec128, 2 ), result );
-    result = vec_add( vec_splat( vec->vec128, 3 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    vec_float4 result = _vmathVfDot4( vec0->vec128, vec1->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV4LengthSqr( const VmathVector4 *vec )
-{
-    vec_float4 result = _vmathVfDot4( vec->vec128, vec->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathV4Length( const VmathVector4 *vec )
-{
-    return sqrtf( vmathV4LengthSqr( vec ) );
-}
-
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
-{
-    vec_float4 dot = _vmathVfDot4( vec->vec128, vec->vec128 );
-    result->vec128 = vec_madd( vec->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
-{
-    unsigned int tmp;
-    tmp = (unsigned int)-(select1 > 0);
-    result->vec128 = vec_sel( vec0->vec128, vec1->vec128, _vmathVuiSplatScalar(tmp) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print( const VmathVector4 *vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = pnt->vec128;
-}
-
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
-        result->vec128 = (vec_float4){_x, _y, _z, 0.0f};
-    } else {
-        float *pf = (float *)&result->vec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-    }
-}
-
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
-{
-    result->vec128 = _vmathVfSplatScalar(scalar);
-}
-
-static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathP3AddV3( result, pnt0, &tmpV3_1 );
-}
-
-static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt )
-{
-    return pnt->vec128;
-}
-
-static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
-    dstVec = vec_sel(pnt->vec128, dstVec, mask);
-    *quad = dstVec;
-}
-
-static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = vec_sld( xyzx, yzxy, 12 );
-    xyz2 = vec_sld( yzxy, zxyz, 8 );
-    xyz3 = vec_sld( zxyz, zxyz, 4 );
-    pnt0->vec128 = xyzx;
-    pnt1->vec128 = xyz1;
-    pnt2->vec128 = xyz2;
-    pnt3->vec128 = xyz3;
-}
-
-static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = vec_perm( pnt0->vec128, pnt1->vec128, _VECTORMATH_PERM_XYZA );
-    yzxy = vec_perm( pnt1->vec128, pnt2->vec128, _VECTORMATH_PERM_YZAB );
-    zxyz = vec_perm( pnt2->vec128, pnt3->vec128, _VECTORMATH_PERM_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathP3StoreXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
-    vmathP3StoreXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathP3SetX( VmathPoint3 *result, float _x )
-{
-    _vmathVfSetElement(result->vec128, _x, 0);
-}
-
-static inline float vmathP3GetX( const VmathPoint3 *pnt )
-{
-    return _vmathVfGetElement(pnt->vec128, 0);
-}
-
-static inline void vmathP3SetY( VmathPoint3 *result, float _y )
-{
-    _vmathVfSetElement(result->vec128, _y, 1);
-}
-
-static inline float vmathP3GetY( const VmathPoint3 *pnt )
-{
-    return _vmathVfGetElement(pnt->vec128, 1);
-}
-
-static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
-{
-    _vmathVfSetElement(result->vec128, _z, 2);
-}
-
-static inline float vmathP3GetZ( const VmathPoint3 *pnt )
-{
-    return _vmathVfGetElement(pnt->vec128, 2);
-}
-
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
-{
-    _vmathVfSetElement(result->vec128, value, idx);
-}
-
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
-{
-    return _vmathVfGetElement(pnt->vec128, idx);
-}
-
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = vec_sub( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->vec128 = vec_add( pnt->vec128, vec1->vec128 );
-}
-
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->vec128 = vec_sub( pnt->vec128, vec1->vec128 );
-}
-
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = vec_madd( pnt0->vec128, pnt1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = divf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = recipf4( pnt->vec128 );
-}
-
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = sqrtf4( pnt->vec128 );
-}
-
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = rsqrtf4( pnt->vec128 );
-}
-
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = fabsf4( pnt->vec128 );
-}
-
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = copysignf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = fmaxf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
-    result = fmaxf4( vec_splat( pnt->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = fminf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline float vmathP3MinElem( const VmathPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
-    result = fminf4( vec_splat( pnt->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathP3Sum( const VmathPoint3 *pnt )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
-    result = vec_add( vec_splat( pnt->vec128, 2 ), result );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
-{
-    vec_float4 result = _vmathVfDot3( pnt->vec128, unitVec->vec128 );
-    return _vmathVfGetElement(result, 0);
-}
-
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
-{
-    unsigned int tmp;
-    tmp = (unsigned int)-(select1 > 0);
-    result->vec128 = vec_sel( pnt0->vec128, pnt1->vec128, _vmathVuiSplatScalar(tmp) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print( const VmathPoint3 *pnt )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt->vec128;
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt->vec128;
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_C_H
+#define _VECTORMATH_VEC_AOS_C_H
+#include <altivec.h>
+#include <simdmath.h>
+#include <stddef.h>
+#include "vec_types.h"
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for permutes words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
+#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
+#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
+#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
+#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
+#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
+#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
+#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
+    return vec_madd( vec_sld( vec0, vec0, 8 ), vec_sld( vec1, vec1, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
+    return vec_add( vec_sld( result, result, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = vec_perm( vec0, vec0, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( vec1, vec1, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( vec0, vec0, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( vec1, vec1, _VECTORMATH_PERM_YZXW );
+    result = vec_madd( tmp0, tmp1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_nmsub( tmp2, tmp3, result );
+    return result;
+}
+
+static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
+{
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = (vec_uint4){0x00007c00u,0x00007c00u,0x00007c00u,0x00007c00u};
+    const vec_uint4 mergeMant = (vec_uint4){0x000003ffu,0x000003ffu,0x000003ffu,0x000003ffu};
+    const vec_uint4 mergeSign = (vec_uint4){0x00008000u,0x00008000u,0x00008000u,0x00008000u};
+
+    sign = vec_sr((vec_uint4)v, (vec_uint4){16,16,16,16});
+    mant = vec_sr((vec_uint4)v, (vec_uint4){13,13,13,13});
+    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4){23,23,23,23}), (vec_int4){0xff,0xff,0xff,0xff});
+
+    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4){112,112,112,112});
+    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4){142,142,142,142});
+
+    bexp = vec_add(bexp, (vec_int4){-112,-112,-112,-112});
+    bexp = vec_sl(bexp, (vec_uint4){10,10,10,10});
+
+    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = vec_sel((vec_uint4){0,0,0,0}, hfloat, notZero);
+    hfloat = vec_sel(hfloat, hfloatInf, isInf);
+    hfloat = vec_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+}
+
+static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
+{
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
+}
+
+#ifndef __GNUC__
+#define __builtin_constant_p(x) 0
+#endif
+
+static inline vec_float4 _vmathVfInsert(vec_float4 dst, vec_float4 src, int slot)
+{
+#ifdef __GNUC__
+    if (__builtin_constant_p(slot)) {
+        dst = vec_sld(dst, dst, slot<<2);
+        dst = vec_sld(dst, src, 4);
+        if (slot != 3) dst = vec_sld(dst, dst, (3-slot)<<2);
+        return dst;
+    } else
+#endif
+    {
+        vec_uchar16 shiftpattern = vec_lvsr( 0, (float *)(size_t)(slot<<2) );
+        vec_uint4 selectmask = (vec_uint4)vec_perm( (vec_uint4){0,0,0,0}, _VECTORMATH_MASK_0xF000, shiftpattern );
+        return vec_sel( dst, src, selectmask );
+    }
+}
+
+#define _vmathVfGetElement(vec, slot) ((float *)&(vec))[slot]
+#ifdef _VECTORMATH_SET_CONSTS_IN_MEM
+#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
+#else
+#define _vmathVfSetElement(vec, scalar, slot)                                            \
+{                                                                                        \
+    if (__builtin_constant_p(scalar)) {                                                  \
+        (vec) = _vmathVfInsert(vec, (vec_float4){scalar, scalar, scalar, scalar}, slot); \
+    } else {                                                                             \
+        ((float *)&(vec))[slot] = scalar;                                                \
+    }                                                                                    \
+}
+#endif
+
+static inline vec_float4 _vmathVfSplatScalar(float scalar)
+{
+    vec_float4 result;
+    if (__builtin_constant_p(scalar)) {
+        result = (vec_float4){scalar, scalar, scalar, scalar};
+    } else {
+        result = vec_ld(0, &scalar);
+        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
+    } 
+    return result;
+}
+
+static inline vec_uint4 _vmathVuiSplatScalar(unsigned int scalar)
+{
+    vec_uint4 result;
+    if (__builtin_constant_p(scalar)) {
+        result = (vec_uint4){scalar, scalar, scalar, scalar};
+    } else {
+        result = vec_ld(0, &scalar);
+        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
+    } 
+    return result;
+}
+
+#endif
+
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
+        result->vec128 = (vec_float4){_x, _y, _z, 0.0f};
+    } else {
+        float *pf = (float *)&result->vec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+    }
+}
+
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = pnt->vec128;
+}
+
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
+{
+    result->vec128 = _vmathVfSplatScalar(scalar);
+}
+
+static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathV3MakeXAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_1000;
+}
+
+static inline void vmathV3MakeYAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0100;
+}
+
+static inline void vmathV3MakeZAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0010;
+}
+
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathV3Sub( &tmpV3_0, vec1, vec0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathV3Add( result, vec0, &tmpV3_1 );
+}
+
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = _vmathVfSplatScalar(t);
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    result->vec128 = vec_madd( unitVec0->vec128, scale0, vec_madd( unitVec1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline vec_float4 vmathV3Get128( const VmathVector3 *vec )
+{
+    return vec->vec128;
+}
+
+static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
+    dstVec = vec_sel(vec->vec128, dstVec, mask);
+    *quad = dstVec;
+}
+
+static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = vec_sld( xyzx, yzxy, 12 );
+    xyz2 = vec_sld( yzxy, zxyz, 8 );
+    xyz3 = vec_sld( zxyz, zxyz, 4 );
+    vec0->vec128 = xyzx;
+    vec1->vec128 = xyz1;
+    vec2->vec128 = xyz2;
+    vec3->vec128 = xyz3;
+}
+
+static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = vec_perm( vec0->vec128, vec1->vec128, _VECTORMATH_PERM_XYZA );
+    yzxy = vec_perm( vec1->vec128, vec2->vec128, _VECTORMATH_PERM_YZAB );
+    zxyz = vec_perm( vec2->vec128, vec3->vec128, _VECTORMATH_PERM_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathV3StoreXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    vmathV3StoreXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathV3SetX( VmathVector3 *result, float _x )
+{
+    _vmathVfSetElement(result->vec128, _x, 0);
+}
+
+static inline float vmathV3GetX( const VmathVector3 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 0);
+}
+
+static inline void vmathV3SetY( VmathVector3 *result, float _y )
+{
+    _vmathVfSetElement(result->vec128, _y, 1);
+}
+
+static inline float vmathV3GetY( const VmathVector3 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 1);
+}
+
+static inline void vmathV3SetZ( VmathVector3 *result, float _z )
+{
+    _vmathVfSetElement(result->vec128, _z, 2);
+}
+
+static inline float vmathV3GetZ( const VmathVector3 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 2);
+}
+
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
+{
+    _vmathVfSetElement(result->vec128, value, idx);
+}
+
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
+{
+    return _vmathVfGetElement(vec->vec128, idx);
+}
+
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = vec_add( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = vec_sub( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = vec_add( vec->vec128, pnt1->vec128 );
+}
+
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->vec128 = vec_madd( vec->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->vec128 = divf4( vec->vec128, _vmathVfSplatScalar(scalar) );
+}
+
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = negatef4( vec->vec128 );
+}
+
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = vec_madd( vec0->vec128, vec1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = recipf4( vec->vec128 );
+}
+
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = sqrtf4( vec->vec128 );
+}
+
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = rsqrtf4( vec->vec128 );
+}
+
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = fabsf4( vec->vec128 );
+}
+
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV3MaxElem( const VmathVector3 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = fmaxf4( vec_splat( vec->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV3MinElem( const VmathVector3 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = fminf4( vec_splat( vec->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV3Sum( const VmathVector3 *vec )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = vec_add( vec_splat( vec->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    vec_float4 result = _vmathVfDot3( vec0->vec128, vec1->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV3LengthSqr( const VmathVector3 *vec )
+{
+    vec_float4 result = _vmathVfDot3( vec->vec128, vec->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV3Length( const VmathVector3 *vec )
+{
+    return sqrtf( vmathV3LengthSqr( vec ) );
+}
+
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
+{
+    vec_float4 dot = _vmathVfDot3( vec->vec128, vec->vec128 );
+    dot = vec_splat( dot, 0 );
+    result->vec128 = vec_madd( vec->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = _vmathVfCross( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
+{
+    unsigned int tmp;
+    tmp = (unsigned int)-(select1 > 0);
+    result->vec128 = vec_sel( vec0->vec128, vec1->vec128, _vmathVuiSplatScalar(tmp) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print( const VmathVector3 *vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
+        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
+        result->vec128 = (vec_float4){_x, _y, _z, _w};
+    } else {
+        float *pf = (float *)&result->vec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+        pf[3] = _w;
+    }
+}
+
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
+{
+    result->vec128 = xyz->vec128;
+    _vmathVfSetElement(result->vec128, _w, 3);
+}
+
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec->vec128;
+    result->vec128 = _vmathVfInsert(result->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), 3);
+}
+
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = pnt->vec128;
+    result->vec128 = _vmathVfInsert(result->vec128, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), 3);
+}
+
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
+{
+    result->vec128 = _vmathVfSplatScalar(scalar);
+}
+
+static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathV4MakeXAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_1000;
+}
+
+static inline void vmathV4MakeYAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0100;
+}
+
+static inline void vmathV4MakeZAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0010;
+}
+
+static inline void vmathV4MakeWAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0001;
+}
+
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    VmathVector4 tmpV4_0, tmpV4_1;
+    vmathV4Sub( &tmpV4_0, vec1, vec0 );
+    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
+    vmathV4Add( result, vec0, &tmpV4_1 );
+}
+
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot4( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = _vmathVfSplatScalar(t);
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    result->vec128 = vec_madd( unitVec0->vec128, scale0, vec_madd( unitVec1->vec128, scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+static inline vec_float4 vmathV4Get128( const VmathVector4 *vec )
+{
+    return vec->vec128;
+}
+
+static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0->vec128, vec1->vec128);
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2->vec128, vec3->vec128);
+}
+
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec_sel( vec->vec128, result->vec128, _VECTORMATH_MASK_0x000F );
+}
+
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV4SetX( VmathVector4 *result, float _x )
+{
+    _vmathVfSetElement(result->vec128, _x, 0);
+}
+
+static inline float vmathV4GetX( const VmathVector4 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 0);
+}
+
+static inline void vmathV4SetY( VmathVector4 *result, float _y )
+{
+    _vmathVfSetElement(result->vec128, _y, 1);
+}
+
+static inline float vmathV4GetY( const VmathVector4 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 1);
+}
+
+static inline void vmathV4SetZ( VmathVector4 *result, float _z )
+{
+    _vmathVfSetElement(result->vec128, _z, 2);
+}
+
+static inline float vmathV4GetZ( const VmathVector4 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 2);
+}
+
+static inline void vmathV4SetW( VmathVector4 *result, float _w )
+{
+    _vmathVfSetElement(result->vec128, _w, 3);
+}
+
+static inline float vmathV4GetW( const VmathVector4 *vec )
+{
+    return _vmathVfGetElement(vec->vec128, 3);
+}
+
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
+{
+    _vmathVfSetElement(result->vec128, value, idx);
+}
+
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
+{
+    return _vmathVfGetElement(vec->vec128, idx);
+}
+
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = vec_add( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = vec_sub( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->vec128 = vec_madd( vec->vec128, _vmathVfSplatScalar(scalar), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->vec128 = divf4( vec->vec128, _vmathVfSplatScalar(scalar) );
+}
+
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = negatef4( vec->vec128 );
+}
+
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = vec_madd( vec0->vec128, vec1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = recipf4( vec->vec128 );
+}
+
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = sqrtf4( vec->vec128 );
+}
+
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = rsqrtf4( vec->vec128 );
+}
+
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = fabsf4( vec->vec128 );
+}
+
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV4MaxElem( const VmathVector4 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = fmaxf4( vec_splat( vec->vec128, 2 ), result );
+    result = fmaxf4( vec_splat( vec->vec128, 3 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV4MinElem( const VmathVector4 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = fminf4( vec_splat( vec->vec128, 2 ), result );
+    result = fminf4( vec_splat( vec->vec128, 3 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV4Sum( const VmathVector4 *vec )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( vec->vec128, 1 ), vec->vec128 );
+    result = vec_add( vec_splat( vec->vec128, 2 ), result );
+    result = vec_add( vec_splat( vec->vec128, 3 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    vec_float4 result = _vmathVfDot4( vec0->vec128, vec1->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV4LengthSqr( const VmathVector4 *vec )
+{
+    vec_float4 result = _vmathVfDot4( vec->vec128, vec->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathV4Length( const VmathVector4 *vec )
+{
+    return sqrtf( vmathV4LengthSqr( vec ) );
+}
+
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
+{
+    vec_float4 dot = _vmathVfDot4( vec->vec128, vec->vec128 );
+    result->vec128 = vec_madd( vec->vec128, rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
+{
+    unsigned int tmp;
+    tmp = (unsigned int)-(select1 > 0);
+    result->vec128 = vec_sel( vec0->vec128, vec1->vec128, _vmathVuiSplatScalar(tmp) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print( const VmathVector4 *vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = pnt->vec128;
+}
+
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
+        result->vec128 = (vec_float4){_x, _y, _z, 0.0f};
+    } else {
+        float *pf = (float *)&result->vec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+    }
+}
+
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
+{
+    result->vec128 = _vmathVfSplatScalar(scalar);
+}
+
+static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathP3AddV3( result, pnt0, &tmpV3_1 );
+}
+
+static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt )
+{
+    return pnt->vec128;
+}
+
+static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
+    dstVec = vec_sel(pnt->vec128, dstVec, mask);
+    *quad = dstVec;
+}
+
+static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = vec_sld( xyzx, yzxy, 12 );
+    xyz2 = vec_sld( yzxy, zxyz, 8 );
+    xyz3 = vec_sld( zxyz, zxyz, 4 );
+    pnt0->vec128 = xyzx;
+    pnt1->vec128 = xyz1;
+    pnt2->vec128 = xyz2;
+    pnt3->vec128 = xyz3;
+}
+
+static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = vec_perm( pnt0->vec128, pnt1->vec128, _VECTORMATH_PERM_XYZA );
+    yzxy = vec_perm( pnt1->vec128, pnt2->vec128, _VECTORMATH_PERM_YZAB );
+    zxyz = vec_perm( pnt2->vec128, pnt3->vec128, _VECTORMATH_PERM_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathP3StoreXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    vmathP3StoreXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathP3SetX( VmathPoint3 *result, float _x )
+{
+    _vmathVfSetElement(result->vec128, _x, 0);
+}
+
+static inline float vmathP3GetX( const VmathPoint3 *pnt )
+{
+    return _vmathVfGetElement(pnt->vec128, 0);
+}
+
+static inline void vmathP3SetY( VmathPoint3 *result, float _y )
+{
+    _vmathVfSetElement(result->vec128, _y, 1);
+}
+
+static inline float vmathP3GetY( const VmathPoint3 *pnt )
+{
+    return _vmathVfGetElement(pnt->vec128, 1);
+}
+
+static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
+{
+    _vmathVfSetElement(result->vec128, _z, 2);
+}
+
+static inline float vmathP3GetZ( const VmathPoint3 *pnt )
+{
+    return _vmathVfGetElement(pnt->vec128, 2);
+}
+
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
+{
+    _vmathVfSetElement(result->vec128, value, idx);
+}
+
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
+{
+    return _vmathVfGetElement(pnt->vec128, idx);
+}
+
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = vec_sub( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->vec128 = vec_add( pnt->vec128, vec1->vec128 );
+}
+
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->vec128 = vec_sub( pnt->vec128, vec1->vec128 );
+}
+
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = vec_madd( pnt0->vec128, pnt1->vec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = divf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = recipf4( pnt->vec128 );
+}
+
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = sqrtf4( pnt->vec128 );
+}
+
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = rsqrtf4( pnt->vec128 );
+}
+
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = fabsf4( pnt->vec128 );
+}
+
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = copysignf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = fmaxf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
+    result = fmaxf4( vec_splat( pnt->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = fminf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline float vmathP3MinElem( const VmathPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
+    result = fminf4( vec_splat( pnt->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathP3Sum( const VmathPoint3 *pnt )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( pnt->vec128, 1 ), pnt->vec128 );
+    result = vec_add( vec_splat( pnt->vec128, 2 ), result );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
+{
+    vec_float4 result = _vmathVfDot3( pnt->vec128, unitVec->vec128 );
+    return _vmathVfGetElement(result, 0);
+}
+
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
+{
+    unsigned int tmp;
+    tmp = (unsigned int)-(select1 > 0);
+    result->vec128 = vec_sel( pnt0->vec128, pnt1->vec128, _vmathVuiSplatScalar(tmp) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print( const VmathPoint3 *pnt )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt->vec128;
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt->vec128;
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos_v.h
index 6dcc79c72..90ee6b98c 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_aos_v.h
@@ -1,953 +1,953 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_V_C_H
-#define _VECTORMATH_VEC_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for permutes words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
-#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
-#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
-#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
-#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
-#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
-#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
-#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathVector3 result;
-    vmathV3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector3 result;
-    vmathV3MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
-{
-    VmathVector3 result;
-    vmathV3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathVector3 result;
-    vmathV3MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeXAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeYAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeZAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathVector3 result;
-    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline vec_float4 vmathV3Get128_V( VmathVector3 vec )
-{
-    return vmathV3Get128(&vec);
-}
-
-static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad )
-{
-    vmathV3StoreXYZ(&vec, quad);
-}
-
-static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
-{
-    vmathV3LoadXYZArray(vec0, vec1, vec2, vec3, threeQuads);
-}
-
-static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads )
-{
-    vmathV3StoreXYZArray(&vec0, &vec1, &vec2, &vec3, threeQuads);
-}
-
-static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads )
-{
-    vmathV3StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, &vec4, &vec5, &vec6, &vec7, threeQuads);
-}
-
-static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
-{
-    vmathV3SetX(result, _x);
-}
-
-static inline float vmathV3GetX_V( VmathVector3 vec )
-{
-    return vmathV3GetX(&vec);
-}
-
-static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
-{
-    vmathV3SetY(result, _y);
-}
-
-static inline float vmathV3GetY_V( VmathVector3 vec )
-{
-    return vmathV3GetY(&vec);
-}
-
-static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
-{
-    vmathV3SetZ(result, _z);
-}
-
-static inline float vmathV3GetZ_V( VmathVector3 vec )
-{
-    return vmathV3GetZ(&vec);
-}
-
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
-{
-    vmathV3SetElem(result, idx, value);
-}
-
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
-{
-    return vmathV3GetElem(&vec, idx);
-}
-
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathV3AddP3(&result, &vec, &pnt1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MaxElem_V( VmathVector3 vec )
-{
-    return vmathV3MaxElem(&vec);
-}
-
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MinElem_V( VmathVector3 vec )
-{
-    return vmathV3MinElem(&vec);
-}
-
-static inline float vmathV3Sum_V( VmathVector3 vec )
-{
-    return vmathV3Sum(&vec);
-}
-
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    return vmathV3Dot(&vec0, &vec1);
-}
-
-static inline float vmathV3LengthSqr_V( VmathVector3 vec )
-{
-    return vmathV3LengthSqr(&vec);
-}
-
-static inline float vmathV3Length_V( VmathVector3 vec )
-{
-    return vmathV3Length(&vec);
-}
-
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Cross(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
-{
-    VmathVector3 result;
-    vmathV3Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print_V( VmathVector3 vec )
-{
-    vmathV3Print(&vec);
-}
-
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
-{
-    vmathV3Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathV4MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
-{
-    VmathVector4 result;
-    vmathV4MakeFromQ(&result, &quat);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
-{
-    VmathVector4 result;
-    vmathV4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathVector4 result;
-    vmathV4MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeXAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeYAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeZAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeWAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeWAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
-{
-    VmathVector4 result;
-    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline vec_float4 vmathV4Get128_V( VmathVector4 vec )
-{
-    return vmathV4Get128(&vec);
-}
-
-static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads )
-{
-    vmathV4StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, twoQuads);
-}
-
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
-{
-    vmathV4SetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
-{
-    VmathVector3 result;
-    vmathV4GetXYZ(&result, &vec);
-    return result;
-}
-
-static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
-{
-    vmathV4SetX(result, _x);
-}
-
-static inline float vmathV4GetX_V( VmathVector4 vec )
-{
-    return vmathV4GetX(&vec);
-}
-
-static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
-{
-    vmathV4SetY(result, _y);
-}
-
-static inline float vmathV4GetY_V( VmathVector4 vec )
-{
-    return vmathV4GetY(&vec);
-}
-
-static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
-{
-    vmathV4SetZ(result, _z);
-}
-
-static inline float vmathV4GetZ_V( VmathVector4 vec )
-{
-    return vmathV4GetZ(&vec);
-}
-
-static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
-{
-    vmathV4SetW(result, _w);
-}
-
-static inline float vmathV4GetW_V( VmathVector4 vec )
-{
-    return vmathV4GetW(&vec);
-}
-
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
-{
-    vmathV4SetElem(result, idx, value);
-}
-
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
-{
-    return vmathV4GetElem(&vec, idx);
-}
-
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MaxElem_V( VmathVector4 vec )
-{
-    return vmathV4MaxElem(&vec);
-}
-
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MinElem_V( VmathVector4 vec )
-{
-    return vmathV4MinElem(&vec);
-}
-
-static inline float vmathV4Sum_V( VmathVector4 vec )
-{
-    return vmathV4Sum(&vec);
-}
-
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    return vmathV4Dot(&vec0, &vec1);
-}
-
-static inline float vmathV4LengthSqr_V( VmathVector4 vec )
-{
-    return vmathV4LengthSqr(&vec);
-}
-
-static inline float vmathV4Length_V( VmathVector4 vec )
-{
-    return vmathV4Length(&vec);
-}
-
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
-{
-    VmathVector4 result;
-    vmathV4Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print_V( VmathVector4 vec )
-{
-    vmathV4Print(&vec);
-}
-
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
-{
-    vmathV4Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathPoint3 result;
-    vmathP3MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3Lerp(&result, t, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt )
-{
-    return vmathP3Get128(&pnt);
-}
-
-static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad )
-{
-    vmathP3StoreXYZ(&pnt, quad);
-}
-
-static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
-{
-    vmathP3LoadXYZArray(pnt0, pnt1, pnt2, pnt3, threeQuads);
-}
-
-static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads )
-{
-    vmathP3StoreXYZArray(&pnt0, &pnt1, &pnt2, &pnt3, threeQuads);
-}
-
-static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads )
-{
-    vmathP3StoreHalfFloats(&pnt0, &pnt1, &pnt2, &pnt3, &pnt4, &pnt5, &pnt6, &pnt7, threeQuads);
-}
-
-static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
-{
-    vmathP3SetX(result, _x);
-}
-
-static inline float vmathP3GetX_V( VmathPoint3 pnt )
-{
-    return vmathP3GetX(&pnt);
-}
-
-static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
-{
-    vmathP3SetY(result, _y);
-}
-
-static inline float vmathP3GetY_V( VmathPoint3 pnt )
-{
-    return vmathP3GetY(&pnt);
-}
-
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
-{
-    vmathP3SetZ(result, _z);
-}
-
-static inline float vmathP3GetZ_V( VmathPoint3 pnt )
-{
-    return vmathP3GetZ(&pnt);
-}
-
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
-{
-    vmathP3SetElem(result, idx, value);
-}
-
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
-{
-    return vmathP3GetElem(&pnt, idx);
-}
-
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathVector3 result;
-    vmathP3Sub(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3AddV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3SubV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MulPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3DivPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RecipPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3SqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RsqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3AbsPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MaxElem(&pnt);
-}
-
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MinPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MinElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MinElem(&pnt);
-}
-
-static inline float vmathP3Sum_V( VmathPoint3 pnt )
-{
-    return vmathP3Sum(&pnt);
-}
-
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
-{
-    VmathPoint3 result;
-    vmathP3Scale(&result, &pnt, scaleVal);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
-{
-    VmathPoint3 result;
-    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
-    return result;
-}
-
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
-{
-    return vmathP3Projection(&pnt, &unitVec);
-}
-
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistSqrFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3DistSqr(&pnt0, &pnt1);
-}
-
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3Dist(&pnt0, &pnt1);
-}
-
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
-{
-    VmathPoint3 result;
-    vmathP3Select(&result, &pnt0, &pnt1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print_V( VmathPoint3 pnt )
-{
-    vmathP3Print(&pnt);
-}
-
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
-{
-    vmathP3Prints(&pnt, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_V_C_H
+#define _VECTORMATH_VEC_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for permutes words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
+#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
+#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
+#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
+#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
+#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
+#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
+#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathVector3 result;
+    vmathV3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector3 result;
+    vmathV3MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
+{
+    VmathVector3 result;
+    vmathV3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathVector3 result;
+    vmathV3MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeXAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeYAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeZAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathVector3 result;
+    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline vec_float4 vmathV3Get128_V( VmathVector3 vec )
+{
+    return vmathV3Get128(&vec);
+}
+
+static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad )
+{
+    vmathV3StoreXYZ(&vec, quad);
+}
+
+static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
+{
+    vmathV3LoadXYZArray(vec0, vec1, vec2, vec3, threeQuads);
+}
+
+static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads )
+{
+    vmathV3StoreXYZArray(&vec0, &vec1, &vec2, &vec3, threeQuads);
+}
+
+static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads )
+{
+    vmathV3StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, &vec4, &vec5, &vec6, &vec7, threeQuads);
+}
+
+static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
+{
+    vmathV3SetX(result, _x);
+}
+
+static inline float vmathV3GetX_V( VmathVector3 vec )
+{
+    return vmathV3GetX(&vec);
+}
+
+static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
+{
+    vmathV3SetY(result, _y);
+}
+
+static inline float vmathV3GetY_V( VmathVector3 vec )
+{
+    return vmathV3GetY(&vec);
+}
+
+static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
+{
+    vmathV3SetZ(result, _z);
+}
+
+static inline float vmathV3GetZ_V( VmathVector3 vec )
+{
+    return vmathV3GetZ(&vec);
+}
+
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
+{
+    vmathV3SetElem(result, idx, value);
+}
+
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
+{
+    return vmathV3GetElem(&vec, idx);
+}
+
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathV3AddP3(&result, &vec, &pnt1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MaxElem_V( VmathVector3 vec )
+{
+    return vmathV3MaxElem(&vec);
+}
+
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MinElem_V( VmathVector3 vec )
+{
+    return vmathV3MinElem(&vec);
+}
+
+static inline float vmathV3Sum_V( VmathVector3 vec )
+{
+    return vmathV3Sum(&vec);
+}
+
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    return vmathV3Dot(&vec0, &vec1);
+}
+
+static inline float vmathV3LengthSqr_V( VmathVector3 vec )
+{
+    return vmathV3LengthSqr(&vec);
+}
+
+static inline float vmathV3Length_V( VmathVector3 vec )
+{
+    return vmathV3Length(&vec);
+}
+
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Cross(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
+{
+    VmathVector3 result;
+    vmathV3Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print_V( VmathVector3 vec )
+{
+    vmathV3Print(&vec);
+}
+
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
+{
+    vmathV3Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathV4MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
+{
+    VmathVector4 result;
+    vmathV4MakeFromQ(&result, &quat);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
+{
+    VmathVector4 result;
+    vmathV4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathVector4 result;
+    vmathV4MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeXAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeYAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeZAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeWAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeWAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
+{
+    VmathVector4 result;
+    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline vec_float4 vmathV4Get128_V( VmathVector4 vec )
+{
+    return vmathV4Get128(&vec);
+}
+
+static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads )
+{
+    vmathV4StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, twoQuads);
+}
+
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
+{
+    vmathV4SetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
+{
+    VmathVector3 result;
+    vmathV4GetXYZ(&result, &vec);
+    return result;
+}
+
+static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
+{
+    vmathV4SetX(result, _x);
+}
+
+static inline float vmathV4GetX_V( VmathVector4 vec )
+{
+    return vmathV4GetX(&vec);
+}
+
+static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
+{
+    vmathV4SetY(result, _y);
+}
+
+static inline float vmathV4GetY_V( VmathVector4 vec )
+{
+    return vmathV4GetY(&vec);
+}
+
+static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
+{
+    vmathV4SetZ(result, _z);
+}
+
+static inline float vmathV4GetZ_V( VmathVector4 vec )
+{
+    return vmathV4GetZ(&vec);
+}
+
+static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
+{
+    vmathV4SetW(result, _w);
+}
+
+static inline float vmathV4GetW_V( VmathVector4 vec )
+{
+    return vmathV4GetW(&vec);
+}
+
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
+{
+    vmathV4SetElem(result, idx, value);
+}
+
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
+{
+    return vmathV4GetElem(&vec, idx);
+}
+
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MaxElem_V( VmathVector4 vec )
+{
+    return vmathV4MaxElem(&vec);
+}
+
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MinElem_V( VmathVector4 vec )
+{
+    return vmathV4MinElem(&vec);
+}
+
+static inline float vmathV4Sum_V( VmathVector4 vec )
+{
+    return vmathV4Sum(&vec);
+}
+
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    return vmathV4Dot(&vec0, &vec1);
+}
+
+static inline float vmathV4LengthSqr_V( VmathVector4 vec )
+{
+    return vmathV4LengthSqr(&vec);
+}
+
+static inline float vmathV4Length_V( VmathVector4 vec )
+{
+    return vmathV4Length(&vec);
+}
+
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
+{
+    VmathVector4 result;
+    vmathV4Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print_V( VmathVector4 vec )
+{
+    vmathV4Print(&vec);
+}
+
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
+{
+    vmathV4Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathPoint3 result;
+    vmathP3MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3Lerp(&result, t, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt )
+{
+    return vmathP3Get128(&pnt);
+}
+
+static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad )
+{
+    vmathP3StoreXYZ(&pnt, quad);
+}
+
+static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
+{
+    vmathP3LoadXYZArray(pnt0, pnt1, pnt2, pnt3, threeQuads);
+}
+
+static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads )
+{
+    vmathP3StoreXYZArray(&pnt0, &pnt1, &pnt2, &pnt3, threeQuads);
+}
+
+static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads )
+{
+    vmathP3StoreHalfFloats(&pnt0, &pnt1, &pnt2, &pnt3, &pnt4, &pnt5, &pnt6, &pnt7, threeQuads);
+}
+
+static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
+{
+    vmathP3SetX(result, _x);
+}
+
+static inline float vmathP3GetX_V( VmathPoint3 pnt )
+{
+    return vmathP3GetX(&pnt);
+}
+
+static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
+{
+    vmathP3SetY(result, _y);
+}
+
+static inline float vmathP3GetY_V( VmathPoint3 pnt )
+{
+    return vmathP3GetY(&pnt);
+}
+
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
+{
+    vmathP3SetZ(result, _z);
+}
+
+static inline float vmathP3GetZ_V( VmathPoint3 pnt )
+{
+    return vmathP3GetZ(&pnt);
+}
+
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
+{
+    vmathP3SetElem(result, idx, value);
+}
+
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
+{
+    return vmathP3GetElem(&pnt, idx);
+}
+
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathVector3 result;
+    vmathP3Sub(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3AddV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3SubV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MulPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3DivPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RecipPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3SqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RsqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3AbsPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MaxElem(&pnt);
+}
+
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MinPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MinElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MinElem(&pnt);
+}
+
+static inline float vmathP3Sum_V( VmathPoint3 pnt )
+{
+    return vmathP3Sum(&pnt);
+}
+
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
+{
+    VmathPoint3 result;
+    vmathP3Scale(&result, &pnt, scaleVal);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
+{
+    VmathPoint3 result;
+    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
+    return result;
+}
+
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
+{
+    return vmathP3Projection(&pnt, &unitVec);
+}
+
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistSqrFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3DistSqr(&pnt0, &pnt1);
+}
+
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3Dist(&pnt0, &pnt1);
+}
+
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
+{
+    VmathPoint3 result;
+    vmathP3Select(&result, &pnt0, &pnt1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print_V( VmathPoint3 pnt )
+{
+    vmathP3Print(&pnt);
+}
+
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
+{
+    vmathP3Prints(&pnt, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa.h
index 1cda25747..6433666e6 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa.h
@@ -1,1223 +1,1223 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_C_H
-#define _VECTORMATH_VEC_SOA_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for permutes, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
-{
-    vec_float4 vec128 = vec->vec128;
-    result->x = vec_splat( vec128, 0 );
-    result->y = vec_splat( vec128, 1 );
-    result->z = vec_splat( vec128, 2 );
-}
-
-static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( vec0->vec128, vec2->vec128 );
-    tmp1 = vec_mergeh( vec1->vec128, vec3->vec128 );
-    tmp2 = vec_mergel( vec0->vec128, vec2->vec128 );
-    tmp3 = vec_mergel( vec1->vec128, vec3->vec128 );
-    result->x = vec_mergeh( tmp0, tmp1 );
-    result->y = vec_mergel( tmp0, tmp1 );
-    result->z = vec_mergeh( tmp2, tmp3 );
-}
-
-static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathSoaV3Add( result, vec0, &tmpV3_1 );
-}
-
-static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
-    vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
-}
-
-static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = vec_mergeh( vec->x, vec->z );
-    tmp1 = vec_mergel( vec->x, vec->z );
-    vmathV3MakeFrom128( result0, vec_mergeh( tmp0, vec->y ) );
-    vmathV3MakeFrom128( result1, vec_perm( tmp0, vec->y, _VECTORMATH_PERM_ZBWX ) );
-    vmathV3MakeFrom128( result2, vec_perm( tmp1, vec->y, _VECTORMATH_PERM_XCYX ) );
-    vmathV3MakeFrom128( result3, vec_perm( tmp1, vec->y, _VECTORMATH_PERM_ZDWX ) );
-}
-
-static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = vec_sld( yzxy, xyzx, 8 );
-    zxzx = vec_sld( xyzx, zxyz, 8 );
-    yzyz = vec_sld( zxyz, yzxy, 8 );
-    vmathSoaV3SetX( vec, vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
-    vmathSoaV3SetY( vec, vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
-    vmathSoaV3SetZ( vec, vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
-}
-
-static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = vec_perm( vec->x, vec->y, _VECTORMATH_PERM_ZCXA );
-    zxzx = vec_perm( vec->z, vec->x, _VECTORMATH_PERM_XBZD );
-    yzyz = vec_perm( vec->y, vec->z, _VECTORMATH_PERM_WDYB );
-    xyzx = vec_sld( xyxy, zxzx, 8 );
-    yzxy = vec_sld( yzyz, xyxy, 8 );
-    zxyz = vec_sld( zxzx, yzyz, 8 );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathSoaV3StoreXYZArray( vec0, xyz0 );
-    vmathSoaV3StoreXYZArray( vec1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = vec_add( vec0->x, vec1->x );
-    result->y = vec_add( vec0->y, vec1->y );
-    result->z = vec_add( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = vec_sub( vec0->x, vec1->x );
-    result->y = vec_sub( vec0->y, vec1->y );
-    result->z = vec_sub( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = vec_add( vec->x, pnt1->x );
-    result->y = vec_add( vec->y, pnt1->y );
-    result->z = vec_add( vec->z, pnt1->z );
-}
-
-static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
-{
-    result->x = vec_madd( vec->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
-{
-    result->x = divf4( vec->x, scalar );
-    result->y = divf4( vec->y, scalar );
-    result->z = divf4( vec->z, scalar );
-}
-
-static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = negatef4( vec->x );
-    result->y = negatef4( vec->y );
-    result->z = negatef4( vec->z );
-}
-
-static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = divf4( vec0->x, vec1->x );
-    result->y = divf4( vec0->y, vec1->y );
-    result->z = divf4( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->x );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->y );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->z );
-}
-
-static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = sqrtf4( vec->x );
-    result->y = sqrtf4( vec->y );
-    result->z = sqrtf4( vec->z );
-}
-
-static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->x ) );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->y ) );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->z ) );
-}
-
-static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = fabsf4( vec->x );
-    result->y = fabsf4( vec->y );
-    result->z = fabsf4( vec->z );
-}
-
-static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = copysignf4( vec0->x, vec1->x );
-    result->y = copysignf4( vec0->y, vec1->y );
-    result->z = copysignf4( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = fmaxf4( vec0->x, vec1->x );
-    result->y = fmaxf4( vec0->y, vec1->y );
-    result->z = fmaxf4( vec0->z, vec1->z );
-}
-
-static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec->x, vec->y );
-    result = fmaxf4( vec->z, result );
-    return result;
-}
-
-static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = fminf4( vec0->x, vec1->x );
-    result->y = fminf4( vec0->y, vec1->y );
-    result->z = fminf4( vec0->z, vec1->z );
-}
-
-static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec->x, vec->y );
-    result = fminf4( vec->z, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = vec_add( vec->x, vec->y );
-    result = vec_add( result, vec->z );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = vec_madd( vec->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
-{
-    return sqrtf4( vmathSoaV3LengthSqr( vec ) );
-}
-
-static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaV3LengthSqr( vec );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    result->x = vec_madd( vec->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = vec_sub( vec_madd( vec0->y, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->z, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_sub( vec_madd( vec0->z, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->x, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_sub( vec_madd( vec0->x, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->y, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
-{
-    result->x = vec_sel( vec0->x, vec1->x, select1 );
-    result->y = vec_sel( vec0->y, vec1->y, select1 );
-    result->z = vec_sel( vec0->z, vec1->z, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
-{
-    VmathVector3 vec0, vec1, vec2, vec3;
-    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV3Print( &vec3 );
-}
-
-static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
-{
-    VmathVector3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV3Print( &vec3 );
-}
-
-#endif
-
-static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
-{
-    vmathSoaV4SetXYZ( result, xyz );
-    vmathSoaV4SetW( result, _w );
-}
-
-static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-}
-
-static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-    result->w = ((vec_float4){1.0f,1.0f,1.0f,1.0f});
-}
-
-static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
-{
-    vec_float4 vec128 = vec->vec128;
-    result->x = vec_splat( vec128, 0 );
-    result->y = vec_splat( vec128, 1 );
-    result->z = vec_splat( vec128, 2 );
-    result->w = vec_splat( vec128, 3 );
-}
-
-static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( vec0->vec128, vec2->vec128 );
-    tmp1 = vec_mergeh( vec1->vec128, vec3->vec128 );
-    tmp2 = vec_mergel( vec0->vec128, vec2->vec128 );
-    tmp3 = vec_mergel( vec1->vec128, vec3->vec128 );
-    result->x = vec_mergeh( tmp0, tmp1 );
-    result->y = vec_mergel( tmp0, tmp1 );
-    result->z = vec_mergeh( tmp2, tmp3 );
-    result->w = vec_mergel( tmp2, tmp3 );
-}
-
-static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    VmathSoaVector4 tmpV4_0, tmpV4_1;
-    vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
-    vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
-    vmathSoaV4Add( result, vec0, &tmpV4_1 );
-}
-
-static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
-{
-    VmathSoaVector4 tmpV4_0, tmpV4_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
-    vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
-    vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
-}
-
-static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( vec->x, vec->z );
-    tmp1 = vec_mergeh( vec->y, vec->w );
-    tmp2 = vec_mergel( vec->x, vec->z );
-    tmp3 = vec_mergel( vec->y, vec->w );
-    vmathV4MakeFrom128( result0, vec_mergeh( tmp0, tmp1 ) );
-    vmathV4MakeFrom128( result1, vec_mergel( tmp0, tmp1 ) );
-    vmathV4MakeFrom128( result2, vec_mergeh( tmp2, tmp3 ) );
-    vmathV4MakeFrom128( result3, vec_mergel( tmp2, tmp3 ) );
-}
-
-static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
-{
-    VmathVector4 v0, v1, v2, v3;
-    vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
-    twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
-    twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
-}
-
-static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
-{
-    vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
-}
-
-static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
-{
-    result->w = _w;
-}
-
-static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
-{
-    return vec->w;
-}
-
-static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = vec_add( vec0->x, vec1->x );
-    result->y = vec_add( vec0->y, vec1->y );
-    result->z = vec_add( vec0->z, vec1->z );
-    result->w = vec_add( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = vec_sub( vec0->x, vec1->x );
-    result->y = vec_sub( vec0->y, vec1->y );
-    result->z = vec_sub( vec0->z, vec1->z );
-    result->w = vec_sub( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
-{
-    result->x = vec_madd( vec->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->w = vec_madd( vec->w, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
-{
-    result->x = divf4( vec->x, scalar );
-    result->y = divf4( vec->y, scalar );
-    result->z = divf4( vec->z, scalar );
-    result->w = divf4( vec->w, scalar );
-}
-
-static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = negatef4( vec->x );
-    result->y = negatef4( vec->y );
-    result->z = negatef4( vec->z );
-    result->w = negatef4( vec->w );
-}
-
-static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->w = vec_madd( vec0->w, vec1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = divf4( vec0->x, vec1->x );
-    result->y = divf4( vec0->y, vec1->y );
-    result->z = divf4( vec0->z, vec1->z );
-    result->w = divf4( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->x );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->y );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->z );
-    result->w = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->w );
-}
-
-static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = sqrtf4( vec->x );
-    result->y = sqrtf4( vec->y );
-    result->z = sqrtf4( vec->z );
-    result->w = sqrtf4( vec->w );
-}
-
-static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->x ) );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->y ) );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->z ) );
-    result->w = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->w ) );
-}
-
-static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = fabsf4( vec->x );
-    result->y = fabsf4( vec->y );
-    result->z = fabsf4( vec->z );
-    result->w = fabsf4( vec->w );
-}
-
-static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = copysignf4( vec0->x, vec1->x );
-    result->y = copysignf4( vec0->y, vec1->y );
-    result->z = copysignf4( vec0->z, vec1->z );
-    result->w = copysignf4( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = fmaxf4( vec0->x, vec1->x );
-    result->y = fmaxf4( vec0->y, vec1->y );
-    result->z = fmaxf4( vec0->z, vec1->z );
-    result->w = fmaxf4( vec0->w, vec1->w );
-}
-
-static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec->x, vec->y );
-    result = fmaxf4( vec->z, result );
-    result = fmaxf4( vec->w, result );
-    return result;
-}
-
-static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = fminf4( vec0->x, vec1->x );
-    result->y = fminf4( vec0->y, vec1->y );
-    result->z = fminf4( vec0->z, vec1->z );
-    result->w = fminf4( vec0->w, vec1->w );
-}
-
-static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec->x, vec->y );
-    result = fminf4( vec->z, result );
-    result = fminf4( vec->w, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = vec_add( vec->x, vec->y );
-    result = vec_add( result, vec->z );
-    result = vec_add( result, vec->w );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0->w, vec1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = vec_madd( vec->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec->w, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
-{
-    return sqrtf4( vmathSoaV4LengthSqr( vec ) );
-}
-
-static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaV4LengthSqr( vec );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    result->x = vec_madd( vec->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( vec->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( vec->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->w = vec_madd( vec->w, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
-{
-    result->x = vec_sel( vec0->x, vec1->x, select1 );
-    result->y = vec_sel( vec0->y, vec1->y, select1 );
-    result->z = vec_sel( vec0->z, vec1->z, select1 );
-    result->w = vec_sel( vec0->w, vec1->w, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
-{
-    VmathVector4 vec0, vec1, vec2, vec3;
-    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV4Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV4Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV4Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV4Print( &vec3 );
-}
-
-static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
-{
-    VmathVector4 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV4Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV4Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV4Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV4Print( &vec3 );
-}
-
-#endif
-
-static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
-{
-    vec_float4 vec128 = pnt->vec128;
-    result->x = vec_splat( vec128, 0 );
-    result->y = vec_splat( vec128, 1 );
-    result->z = vec_splat( vec128, 2 );
-}
-
-static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( pnt0->vec128, pnt2->vec128 );
-    tmp1 = vec_mergeh( pnt1->vec128, pnt3->vec128 );
-    tmp2 = vec_mergel( pnt0->vec128, pnt2->vec128 );
-    tmp3 = vec_mergel( pnt1->vec128, pnt3->vec128 );
-    result->x = vec_mergeh( tmp0, tmp1 );
-    result->y = vec_mergel( tmp0, tmp1 );
-    result->z = vec_mergeh( tmp2, tmp3 );
-}
-
-static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
-}
-
-static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = vec_mergeh( pnt->x, pnt->z );
-    tmp1 = vec_mergel( pnt->x, pnt->z );
-    vmathP3MakeFrom128( result0, vec_mergeh( tmp0, pnt->y ) );
-    vmathP3MakeFrom128( result1, vec_perm( tmp0, pnt->y, _VECTORMATH_PERM_ZBWX ) );
-    vmathP3MakeFrom128( result2, vec_perm( tmp1, pnt->y, _VECTORMATH_PERM_XCYX ) );
-    vmathP3MakeFrom128( result3, vec_perm( tmp1, pnt->y, _VECTORMATH_PERM_ZDWX ) );
-}
-
-static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = vec_sld( yzxy, xyzx, 8 );
-    zxzx = vec_sld( xyzx, zxyz, 8 );
-    yzyz = vec_sld( zxyz, yzxy, 8 );
-    vmathSoaP3SetX( vec, vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
-    vmathSoaP3SetY( vec, vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
-    vmathSoaP3SetZ( vec, vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
-}
-
-static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = vec_perm( vec->x, vec->y, _VECTORMATH_PERM_ZCXA );
-    zxzx = vec_perm( vec->z, vec->x, _VECTORMATH_PERM_XBZD );
-    yzyz = vec_perm( vec->y, vec->z, _VECTORMATH_PERM_WDYB );
-    xyzx = vec_sld( xyxy, zxzx, 8 );
-    yzxy = vec_sld( yzyz, xyxy, 8 );
-    zxyz = vec_sld( zxzx, yzyz, 8 );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathSoaP3StoreXYZArray( pnt0, xyz0 );
-    vmathSoaP3StoreXYZArray( pnt1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
-{
-    return pnt->x;
-}
-
-static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
-{
-    return pnt->y;
-}
-
-static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
-{
-    return pnt->z;
-}
-
-static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
-{
-    return *(&pnt->x + idx);
-}
-
-static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = vec_sub( pnt0->x, pnt1->x );
-    result->y = vec_sub( pnt0->y, pnt1->y );
-    result->z = vec_sub( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
-{
-    result->x = vec_add( pnt->x, vec1->x );
-    result->y = vec_add( pnt->y, vec1->y );
-    result->z = vec_add( pnt->z, vec1->z );
-}
-
-static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
-{
-    result->x = vec_sub( pnt->x, vec1->x );
-    result->y = vec_sub( pnt->y, vec1->y );
-    result->z = vec_sub( pnt->z, vec1->z );
-}
-
-static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = vec_madd( pnt0->x, pnt1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->y = vec_madd( pnt0->y, pnt1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result->z = vec_madd( pnt0->z, pnt1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = divf4( pnt0->x, pnt1->x );
-    result->y = divf4( pnt0->y, pnt1->y );
-    result->z = divf4( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->x );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->y );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->z );
-}
-
-static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = sqrtf4( pnt->x );
-    result->y = sqrtf4( pnt->y );
-    result->z = sqrtf4( pnt->z );
-}
-
-static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->x ) );
-    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->y ) );
-    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->z ) );
-}
-
-static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = fabsf4( pnt->x );
-    result->y = fabsf4( pnt->y );
-    result->z = fabsf4( pnt->z );
-}
-
-static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = copysignf4( pnt0->x, pnt1->x );
-    result->y = copysignf4( pnt0->y, pnt1->y );
-    result->z = copysignf4( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = fmaxf4( pnt0->x, pnt1->x );
-    result->y = fmaxf4( pnt0->y, pnt1->y );
-    result->z = fmaxf4( pnt0->z, pnt1->z );
-}
-
-static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( pnt->x, pnt->y );
-    result = fmaxf4( pnt->z, result );
-    return result;
-}
-
-static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = fminf4( pnt0->x, pnt1->x );
-    result->y = fminf4( pnt0->y, pnt1->y );
-    result->z = fminf4( pnt0->z, pnt1->z );
-}
-
-static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fminf4( pnt->x, pnt->y );
-    result = fminf4( pnt->z, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = vec_add( pnt->x, pnt->y );
-    result = vec_add( result, pnt->z );
-    return result;
-}
-
-static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
-{
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
-    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
-{
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
-    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 result;
-    result = vec_madd( pnt->x, unitVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( pnt->y, unitVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( pnt->z, unitVec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathSoaV3LengthSqr( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathSoaV3Length( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathSoaV3LengthSqr( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathSoaV3Length( &tmpV3_0 );
-}
-
-static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
-{
-    result->x = vec_sel( pnt0->x, pnt1->x, select1 );
-    result->y = vec_sel( pnt0->y, pnt1->y, select1 );
-    result->z = vec_sel( pnt0->z, pnt1->z, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
-{
-    VmathPoint3 vec0, vec1, vec2, vec3;
-    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathP3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathP3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathP3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathP3Print( &vec3 );
-}
-
-static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
-{
-    VmathPoint3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathP3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathP3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathP3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathP3Print( &vec3 );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_C_H
+#define _VECTORMATH_VEC_SOA_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for permutes, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
+{
+    vec_float4 vec128 = vec->vec128;
+    result->x = vec_splat( vec128, 0 );
+    result->y = vec_splat( vec128, 1 );
+    result->z = vec_splat( vec128, 2 );
+}
+
+static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( vec0->vec128, vec2->vec128 );
+    tmp1 = vec_mergeh( vec1->vec128, vec3->vec128 );
+    tmp2 = vec_mergel( vec0->vec128, vec2->vec128 );
+    tmp3 = vec_mergel( vec1->vec128, vec3->vec128 );
+    result->x = vec_mergeh( tmp0, tmp1 );
+    result->y = vec_mergel( tmp0, tmp1 );
+    result->z = vec_mergeh( tmp2, tmp3 );
+}
+
+static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathSoaV3Add( result, vec0, &tmpV3_1 );
+}
+
+static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
+    vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
+}
+
+static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = vec_mergeh( vec->x, vec->z );
+    tmp1 = vec_mergel( vec->x, vec->z );
+    vmathV3MakeFrom128( result0, vec_mergeh( tmp0, vec->y ) );
+    vmathV3MakeFrom128( result1, vec_perm( tmp0, vec->y, _VECTORMATH_PERM_ZBWX ) );
+    vmathV3MakeFrom128( result2, vec_perm( tmp1, vec->y, _VECTORMATH_PERM_XCYX ) );
+    vmathV3MakeFrom128( result3, vec_perm( tmp1, vec->y, _VECTORMATH_PERM_ZDWX ) );
+}
+
+static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = vec_sld( yzxy, xyzx, 8 );
+    zxzx = vec_sld( xyzx, zxyz, 8 );
+    yzyz = vec_sld( zxyz, yzxy, 8 );
+    vmathSoaV3SetX( vec, vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
+    vmathSoaV3SetY( vec, vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
+    vmathSoaV3SetZ( vec, vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
+}
+
+static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = vec_perm( vec->x, vec->y, _VECTORMATH_PERM_ZCXA );
+    zxzx = vec_perm( vec->z, vec->x, _VECTORMATH_PERM_XBZD );
+    yzyz = vec_perm( vec->y, vec->z, _VECTORMATH_PERM_WDYB );
+    xyzx = vec_sld( xyxy, zxzx, 8 );
+    yzxy = vec_sld( yzyz, xyxy, 8 );
+    zxyz = vec_sld( zxzx, yzyz, 8 );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathSoaV3StoreXYZArray( vec0, xyz0 );
+    vmathSoaV3StoreXYZArray( vec1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = vec_add( vec0->x, vec1->x );
+    result->y = vec_add( vec0->y, vec1->y );
+    result->z = vec_add( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = vec_sub( vec0->x, vec1->x );
+    result->y = vec_sub( vec0->y, vec1->y );
+    result->z = vec_sub( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = vec_add( vec->x, pnt1->x );
+    result->y = vec_add( vec->y, pnt1->y );
+    result->z = vec_add( vec->z, pnt1->z );
+}
+
+static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
+{
+    result->x = vec_madd( vec->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
+{
+    result->x = divf4( vec->x, scalar );
+    result->y = divf4( vec->y, scalar );
+    result->z = divf4( vec->z, scalar );
+}
+
+static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = negatef4( vec->x );
+    result->y = negatef4( vec->y );
+    result->z = negatef4( vec->z );
+}
+
+static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = divf4( vec0->x, vec1->x );
+    result->y = divf4( vec0->y, vec1->y );
+    result->z = divf4( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->x );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->y );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->z );
+}
+
+static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = sqrtf4( vec->x );
+    result->y = sqrtf4( vec->y );
+    result->z = sqrtf4( vec->z );
+}
+
+static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->x ) );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->y ) );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->z ) );
+}
+
+static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = fabsf4( vec->x );
+    result->y = fabsf4( vec->y );
+    result->z = fabsf4( vec->z );
+}
+
+static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = copysignf4( vec0->x, vec1->x );
+    result->y = copysignf4( vec0->y, vec1->y );
+    result->z = copysignf4( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = fmaxf4( vec0->x, vec1->x );
+    result->y = fmaxf4( vec0->y, vec1->y );
+    result->z = fmaxf4( vec0->z, vec1->z );
+}
+
+static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec->x, vec->y );
+    result = fmaxf4( vec->z, result );
+    return result;
+}
+
+static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = fminf4( vec0->x, vec1->x );
+    result->y = fminf4( vec0->y, vec1->y );
+    result->z = fminf4( vec0->z, vec1->z );
+}
+
+static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec->x, vec->y );
+    result = fminf4( vec->z, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = vec_add( vec->x, vec->y );
+    result = vec_add( result, vec->z );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = vec_madd( vec->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
+{
+    return sqrtf4( vmathSoaV3LengthSqr( vec ) );
+}
+
+static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaV3LengthSqr( vec );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    result->x = vec_madd( vec->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = vec_sub( vec_madd( vec0->y, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->z, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_sub( vec_madd( vec0->z, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->x, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_sub( vec_madd( vec0->x, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0->y, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
+{
+    result->x = vec_sel( vec0->x, vec1->x, select1 );
+    result->y = vec_sel( vec0->y, vec1->y, select1 );
+    result->z = vec_sel( vec0->z, vec1->z, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
+{
+    VmathVector3 vec0, vec1, vec2, vec3;
+    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV3Print( &vec3 );
+}
+
+static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
+{
+    VmathVector3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV3Print( &vec3 );
+}
+
+#endif
+
+static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
+{
+    vmathSoaV4SetXYZ( result, xyz );
+    vmathSoaV4SetW( result, _w );
+}
+
+static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+}
+
+static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+    result->w = ((vec_float4){1.0f,1.0f,1.0f,1.0f});
+}
+
+static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
+{
+    vec_float4 vec128 = vec->vec128;
+    result->x = vec_splat( vec128, 0 );
+    result->y = vec_splat( vec128, 1 );
+    result->z = vec_splat( vec128, 2 );
+    result->w = vec_splat( vec128, 3 );
+}
+
+static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( vec0->vec128, vec2->vec128 );
+    tmp1 = vec_mergeh( vec1->vec128, vec3->vec128 );
+    tmp2 = vec_mergel( vec0->vec128, vec2->vec128 );
+    tmp3 = vec_mergel( vec1->vec128, vec3->vec128 );
+    result->x = vec_mergeh( tmp0, tmp1 );
+    result->y = vec_mergel( tmp0, tmp1 );
+    result->z = vec_mergeh( tmp2, tmp3 );
+    result->w = vec_mergel( tmp2, tmp3 );
+}
+
+static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    VmathSoaVector4 tmpV4_0, tmpV4_1;
+    vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
+    vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
+    vmathSoaV4Add( result, vec0, &tmpV4_1 );
+}
+
+static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
+{
+    VmathSoaVector4 tmpV4_0, tmpV4_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
+    vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
+    vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
+}
+
+static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( vec->x, vec->z );
+    tmp1 = vec_mergeh( vec->y, vec->w );
+    tmp2 = vec_mergel( vec->x, vec->z );
+    tmp3 = vec_mergel( vec->y, vec->w );
+    vmathV4MakeFrom128( result0, vec_mergeh( tmp0, tmp1 ) );
+    vmathV4MakeFrom128( result1, vec_mergel( tmp0, tmp1 ) );
+    vmathV4MakeFrom128( result2, vec_mergeh( tmp2, tmp3 ) );
+    vmathV4MakeFrom128( result3, vec_mergel( tmp2, tmp3 ) );
+}
+
+static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
+{
+    VmathVector4 v0, v1, v2, v3;
+    vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
+    twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
+    twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
+}
+
+static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
+{
+    vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
+}
+
+static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
+{
+    result->w = _w;
+}
+
+static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
+{
+    return vec->w;
+}
+
+static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = vec_add( vec0->x, vec1->x );
+    result->y = vec_add( vec0->y, vec1->y );
+    result->z = vec_add( vec0->z, vec1->z );
+    result->w = vec_add( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = vec_sub( vec0->x, vec1->x );
+    result->y = vec_sub( vec0->y, vec1->y );
+    result->z = vec_sub( vec0->z, vec1->z );
+    result->w = vec_sub( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
+{
+    result->x = vec_madd( vec->x, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec->y, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec->z, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->w = vec_madd( vec->w, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
+{
+    result->x = divf4( vec->x, scalar );
+    result->y = divf4( vec->y, scalar );
+    result->z = divf4( vec->z, scalar );
+    result->w = divf4( vec->w, scalar );
+}
+
+static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = negatef4( vec->x );
+    result->y = negatef4( vec->y );
+    result->z = negatef4( vec->z );
+    result->w = negatef4( vec->w );
+}
+
+static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->w = vec_madd( vec0->w, vec1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = divf4( vec0->x, vec1->x );
+    result->y = divf4( vec0->y, vec1->y );
+    result->z = divf4( vec0->z, vec1->z );
+    result->w = divf4( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->x );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->y );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->z );
+    result->w = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec->w );
+}
+
+static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = sqrtf4( vec->x );
+    result->y = sqrtf4( vec->y );
+    result->z = sqrtf4( vec->z );
+    result->w = sqrtf4( vec->w );
+}
+
+static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->x ) );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->y ) );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->z ) );
+    result->w = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec->w ) );
+}
+
+static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = fabsf4( vec->x );
+    result->y = fabsf4( vec->y );
+    result->z = fabsf4( vec->z );
+    result->w = fabsf4( vec->w );
+}
+
+static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = copysignf4( vec0->x, vec1->x );
+    result->y = copysignf4( vec0->y, vec1->y );
+    result->z = copysignf4( vec0->z, vec1->z );
+    result->w = copysignf4( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = fmaxf4( vec0->x, vec1->x );
+    result->y = fmaxf4( vec0->y, vec1->y );
+    result->z = fmaxf4( vec0->z, vec1->z );
+    result->w = fmaxf4( vec0->w, vec1->w );
+}
+
+static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec->x, vec->y );
+    result = fmaxf4( vec->z, result );
+    result = fmaxf4( vec->w, result );
+    return result;
+}
+
+static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = fminf4( vec0->x, vec1->x );
+    result->y = fminf4( vec0->y, vec1->y );
+    result->z = fminf4( vec0->z, vec1->z );
+    result->w = fminf4( vec0->w, vec1->w );
+}
+
+static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec->x, vec->y );
+    result = fminf4( vec->z, result );
+    result = fminf4( vec->w, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = vec_add( vec->x, vec->y );
+    result = vec_add( result, vec->z );
+    result = vec_add( result, vec->w );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0->x, vec1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec0->y, vec1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0->z, vec1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0->w, vec1->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = vec_madd( vec->x, vec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec->y, vec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec->z, vec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec->w, vec->w, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
+{
+    return sqrtf4( vmathSoaV4LengthSqr( vec ) );
+}
+
+static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaV4LengthSqr( vec );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    result->x = vec_madd( vec->x, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( vec->y, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( vec->z, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->w = vec_madd( vec->w, lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
+{
+    result->x = vec_sel( vec0->x, vec1->x, select1 );
+    result->y = vec_sel( vec0->y, vec1->y, select1 );
+    result->z = vec_sel( vec0->z, vec1->z, select1 );
+    result->w = vec_sel( vec0->w, vec1->w, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
+{
+    VmathVector4 vec0, vec1, vec2, vec3;
+    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV4Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV4Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV4Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV4Print( &vec3 );
+}
+
+static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
+{
+    VmathVector4 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV4Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV4Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV4Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV4Print( &vec3 );
+}
+
+#endif
+
+static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
+{
+    vec_float4 vec128 = pnt->vec128;
+    result->x = vec_splat( vec128, 0 );
+    result->y = vec_splat( vec128, 1 );
+    result->z = vec_splat( vec128, 2 );
+}
+
+static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( pnt0->vec128, pnt2->vec128 );
+    tmp1 = vec_mergeh( pnt1->vec128, pnt3->vec128 );
+    tmp2 = vec_mergel( pnt0->vec128, pnt2->vec128 );
+    tmp3 = vec_mergel( pnt1->vec128, pnt3->vec128 );
+    result->x = vec_mergeh( tmp0, tmp1 );
+    result->y = vec_mergel( tmp0, tmp1 );
+    result->z = vec_mergeh( tmp2, tmp3 );
+}
+
+static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
+}
+
+static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = vec_mergeh( pnt->x, pnt->z );
+    tmp1 = vec_mergel( pnt->x, pnt->z );
+    vmathP3MakeFrom128( result0, vec_mergeh( tmp0, pnt->y ) );
+    vmathP3MakeFrom128( result1, vec_perm( tmp0, pnt->y, _VECTORMATH_PERM_ZBWX ) );
+    vmathP3MakeFrom128( result2, vec_perm( tmp1, pnt->y, _VECTORMATH_PERM_XCYX ) );
+    vmathP3MakeFrom128( result3, vec_perm( tmp1, pnt->y, _VECTORMATH_PERM_ZDWX ) );
+}
+
+static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = vec_sld( yzxy, xyzx, 8 );
+    zxzx = vec_sld( xyzx, zxyz, 8 );
+    yzyz = vec_sld( zxyz, yzxy, 8 );
+    vmathSoaP3SetX( vec, vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
+    vmathSoaP3SetY( vec, vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
+    vmathSoaP3SetZ( vec, vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
+}
+
+static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = vec_perm( vec->x, vec->y, _VECTORMATH_PERM_ZCXA );
+    zxzx = vec_perm( vec->z, vec->x, _VECTORMATH_PERM_XBZD );
+    yzyz = vec_perm( vec->y, vec->z, _VECTORMATH_PERM_WDYB );
+    xyzx = vec_sld( xyxy, zxzx, 8 );
+    yzxy = vec_sld( yzyz, xyxy, 8 );
+    zxyz = vec_sld( zxzx, yzyz, 8 );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathSoaP3StoreXYZArray( pnt0, xyz0 );
+    vmathSoaP3StoreXYZArray( pnt1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
+{
+    return pnt->x;
+}
+
+static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
+{
+    return pnt->y;
+}
+
+static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
+{
+    return pnt->z;
+}
+
+static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
+{
+    return *(&pnt->x + idx);
+}
+
+static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = vec_sub( pnt0->x, pnt1->x );
+    result->y = vec_sub( pnt0->y, pnt1->y );
+    result->z = vec_sub( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
+{
+    result->x = vec_add( pnt->x, vec1->x );
+    result->y = vec_add( pnt->y, vec1->y );
+    result->z = vec_add( pnt->z, vec1->z );
+}
+
+static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
+{
+    result->x = vec_sub( pnt->x, vec1->x );
+    result->y = vec_sub( pnt->y, vec1->y );
+    result->z = vec_sub( pnt->z, vec1->z );
+}
+
+static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = vec_madd( pnt0->x, pnt1->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->y = vec_madd( pnt0->y, pnt1->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result->z = vec_madd( pnt0->z, pnt1->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = divf4( pnt0->x, pnt1->x );
+    result->y = divf4( pnt0->y, pnt1->y );
+    result->z = divf4( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->x );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->y );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt->z );
+}
+
+static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = sqrtf4( pnt->x );
+    result->y = sqrtf4( pnt->y );
+    result->z = sqrtf4( pnt->z );
+}
+
+static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->x ) );
+    result->y = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->y ) );
+    result->z = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt->z ) );
+}
+
+static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = fabsf4( pnt->x );
+    result->y = fabsf4( pnt->y );
+    result->z = fabsf4( pnt->z );
+}
+
+static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = copysignf4( pnt0->x, pnt1->x );
+    result->y = copysignf4( pnt0->y, pnt1->y );
+    result->z = copysignf4( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = fmaxf4( pnt0->x, pnt1->x );
+    result->y = fmaxf4( pnt0->y, pnt1->y );
+    result->z = fmaxf4( pnt0->z, pnt1->z );
+}
+
+static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( pnt->x, pnt->y );
+    result = fmaxf4( pnt->z, result );
+    return result;
+}
+
+static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = fminf4( pnt0->x, pnt1->x );
+    result->y = fminf4( pnt0->y, pnt1->y );
+    result->z = fminf4( pnt0->z, pnt1->z );
+}
+
+static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fminf4( pnt->x, pnt->y );
+    result = fminf4( pnt->z, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = vec_add( pnt->x, pnt->y );
+    result = vec_add( result, pnt->z );
+    return result;
+}
+
+static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
+{
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
+    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
+{
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
+    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 result;
+    result = vec_madd( pnt->x, unitVec->x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( pnt->y, unitVec->y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( pnt->z, unitVec->z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathSoaV3LengthSqr( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathSoaV3Length( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathSoaV3LengthSqr( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathSoaV3Length( &tmpV3_0 );
+}
+
+static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
+{
+    result->x = vec_sel( pnt0->x, pnt1->x, select1 );
+    result->y = vec_sel( pnt0->y, pnt1->y, select1 );
+    result->z = vec_sel( pnt0->z, pnt1->z, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
+{
+    VmathPoint3 vec0, vec1, vec2, vec3;
+    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathP3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathP3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathP3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathP3Print( &vec3 );
+}
+
+static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
+{
+    VmathPoint3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathP3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathP3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathP3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathP3Print( &vec3 );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa_v.h
index 75d7bb6bc..57805b654 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_soa_v.h
@@ -1,958 +1,958 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_V_C_H
-#define _VECTORMATH_VEC_SOA_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for permutes, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromAos(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
-{
-    vmathSoaV3Get4Aos(&vec, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
-{
-    vmathSoaV3LoadXYZArray(vec, threeQuads);
-}
-
-static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads )
-{
-    vmathSoaV3StoreXYZArray(&vec, threeQuads);
-}
-
-static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads )
-{
-    vmathSoaV3StoreHalfFloats(&vec0, &vec1, threeQuads);
-}
-
-static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 _x )
-{
-    vmathSoaV3SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetX(&vec);
-}
-
-static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 _y )
-{
-    vmathSoaV3SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetY(&vec);
-}
-
-static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 _z )
-{
-    vmathSoaV3SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetZ(&vec);
-}
-
-static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value )
-{
-    vmathSoaV3SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx )
-{
-    return vmathSoaV3GetElem(&vec, idx);
-}
-
-static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaV3AddP3(&result, &vec, &pnt1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3MaxElem(&vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3MinElem(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3Sum(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    return vmathSoaV3Dot(&vec0, &vec1);
-}
-
-static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3LengthSqr(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3Length(&vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Cross(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV3Print_V( VmathSoaVector3 vec )
-{
-    vmathSoaV3Print(&vec);
-}
-
-static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name )
-{
-    vmathSoaV3Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromQ(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromAos(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeWAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
-{
-    vmathSoaV4Get4Aos(&vec, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads )
-{
-    vmathSoaV4StoreHalfFloats(&vec, twoQuads);
-}
-
-static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec )
-{
-    vmathSoaV4SetXYZ(result, &vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV4GetXYZ(&result, &vec);
-    return result;
-}
-
-static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 _x )
-{
-    vmathSoaV4SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetX(&vec);
-}
-
-static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 _y )
-{
-    vmathSoaV4SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetY(&vec);
-}
-
-static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 _z )
-{
-    vmathSoaV4SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetZ(&vec);
-}
-
-static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 _w )
-{
-    vmathSoaV4SetW(result, _w);
-}
-
-static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetW(&vec);
-}
-
-static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value )
-{
-    vmathSoaV4SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx )
-{
-    return vmathSoaV4GetElem(&vec, idx);
-}
-
-static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4MaxElem(&vec);
-}
-
-static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4MinElem(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4Sum(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    return vmathSoaV4Dot(&vec0, &vec1);
-}
-
-static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4LengthSqr(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4Length(&vec);
-}
-
-static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV4Print_V( VmathSoaVector4 vec )
-{
-    vmathSoaV4Print(&vec);
-}
-
-static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name )
-{
-    vmathSoaV4Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromAos(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFrom4Aos(&result, &pnt0, &pnt1, &pnt2, &pnt3);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Lerp(&result, t, &pnt0, &pnt1);
-    return result;
-}
-
-static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
-{
-    vmathSoaP3Get4Aos(&pnt, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
-{
-    vmathSoaP3LoadXYZArray(vec, threeQuads);
-}
-
-static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 vec, vec_float4 *threeQuads )
-{
-    vmathSoaP3StoreXYZArray(&vec, threeQuads);
-}
-
-static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads )
-{
-    vmathSoaP3StoreHalfFloats(&pnt0, &pnt1, threeQuads);
-}
-
-static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 _x )
-{
-    vmathSoaP3SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetX(&pnt);
-}
-
-static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 _y )
-{
-    vmathSoaP3SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetY(&pnt);
-}
-
-static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 _z )
-{
-    vmathSoaP3SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetZ(&pnt);
-}
-
-static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value )
-{
-    vmathSoaP3SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx )
-{
-    return vmathSoaP3GetElem(&pnt, idx);
-}
-
-static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaP3Sub(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3AddV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3SubV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MulPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3DivPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3RecipPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3SqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3RsqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3AbsPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3CopySignPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MaxPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3MaxElem(&pnt);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MinPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3MinElem(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3Sum(&pnt);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Scale(&result, &pnt, scaleVal);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3NonUniformScale(&result, &pnt, &scaleVec);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec )
-{
-    return vmathSoaP3Projection(&pnt, &unitVec);
-}
-
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3DistSqrFromOrigin(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3DistFromOrigin(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    return vmathSoaP3DistSqr(&pnt0, &pnt1);
-}
-
-static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    return vmathSoaP3Dist(&pnt0, &pnt1);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Select(&result, &pnt0, &pnt1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt )
-{
-    vmathSoaP3Print(&pnt);
-}
-
-static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name )
-{
-    vmathSoaP3Prints(&pnt, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_V_C_H
+#define _VECTORMATH_VEC_SOA_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for permutes, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromAos(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
+{
+    vmathSoaV3Get4Aos(&vec, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
+{
+    vmathSoaV3LoadXYZArray(vec, threeQuads);
+}
+
+static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads )
+{
+    vmathSoaV3StoreXYZArray(&vec, threeQuads);
+}
+
+static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads )
+{
+    vmathSoaV3StoreHalfFloats(&vec0, &vec1, threeQuads);
+}
+
+static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 _x )
+{
+    vmathSoaV3SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetX(&vec);
+}
+
+static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 _y )
+{
+    vmathSoaV3SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetY(&vec);
+}
+
+static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 _z )
+{
+    vmathSoaV3SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetZ(&vec);
+}
+
+static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value )
+{
+    vmathSoaV3SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx )
+{
+    return vmathSoaV3GetElem(&vec, idx);
+}
+
+static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaV3AddP3(&result, &vec, &pnt1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3MaxElem(&vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3MinElem(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3Sum(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    return vmathSoaV3Dot(&vec0, &vec1);
+}
+
+static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3LengthSqr(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3Length(&vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Cross(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV3Print_V( VmathSoaVector3 vec )
+{
+    vmathSoaV3Print(&vec);
+}
+
+static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name )
+{
+    vmathSoaV3Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromQ(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromAos(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeWAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
+{
+    vmathSoaV4Get4Aos(&vec, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads )
+{
+    vmathSoaV4StoreHalfFloats(&vec, twoQuads);
+}
+
+static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec )
+{
+    vmathSoaV4SetXYZ(result, &vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV4GetXYZ(&result, &vec);
+    return result;
+}
+
+static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 _x )
+{
+    vmathSoaV4SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetX(&vec);
+}
+
+static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 _y )
+{
+    vmathSoaV4SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetY(&vec);
+}
+
+static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 _z )
+{
+    vmathSoaV4SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetZ(&vec);
+}
+
+static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 _w )
+{
+    vmathSoaV4SetW(result, _w);
+}
+
+static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetW(&vec);
+}
+
+static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value )
+{
+    vmathSoaV4SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx )
+{
+    return vmathSoaV4GetElem(&vec, idx);
+}
+
+static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4MaxElem(&vec);
+}
+
+static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4MinElem(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4Sum(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    return vmathSoaV4Dot(&vec0, &vec1);
+}
+
+static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4LengthSqr(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4Length(&vec);
+}
+
+static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV4Print_V( VmathSoaVector4 vec )
+{
+    vmathSoaV4Print(&vec);
+}
+
+static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name )
+{
+    vmathSoaV4Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromAos(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFrom4Aos(&result, &pnt0, &pnt1, &pnt2, &pnt3);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Lerp(&result, t, &pnt0, &pnt1);
+    return result;
+}
+
+static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
+{
+    vmathSoaP3Get4Aos(&pnt, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
+{
+    vmathSoaP3LoadXYZArray(vec, threeQuads);
+}
+
+static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 vec, vec_float4 *threeQuads )
+{
+    vmathSoaP3StoreXYZArray(&vec, threeQuads);
+}
+
+static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads )
+{
+    vmathSoaP3StoreHalfFloats(&pnt0, &pnt1, threeQuads);
+}
+
+static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 _x )
+{
+    vmathSoaP3SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetX(&pnt);
+}
+
+static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 _y )
+{
+    vmathSoaP3SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetY(&pnt);
+}
+
+static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 _z )
+{
+    vmathSoaP3SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetZ(&pnt);
+}
+
+static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value )
+{
+    vmathSoaP3SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx )
+{
+    return vmathSoaP3GetElem(&pnt, idx);
+}
+
+static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaP3Sub(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3AddV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3SubV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MulPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3DivPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3RecipPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3SqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3RsqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3AbsPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3CopySignPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MaxPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3MaxElem(&pnt);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MinPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3MinElem(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3Sum(&pnt);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Scale(&result, &pnt, scaleVal);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3NonUniformScale(&result, &pnt, &scaleVec);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec )
+{
+    return vmathSoaP3Projection(&pnt, &unitVec);
+}
+
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3DistSqrFromOrigin(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3DistFromOrigin(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    return vmathSoaP3DistSqr(&pnt0, &pnt1);
+}
+
+static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    return vmathSoaP3Dist(&pnt0, &pnt1);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Select(&result, &pnt0, &pnt1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt )
+{
+    vmathSoaP3Print(&pnt);
+}
+
+static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name )
+{
+    vmathSoaP3Prints(&pnt, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_types.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_types.h
new file mode 100644
index 000000000..932fb6b4f
--- /dev/null
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vec_types.h
@@ -0,0 +1,55 @@
+/* (C) Copyright
+   Sony Computer Entertainment, Inc.,
+   2001,2002,2003,2004,2005,2006,2007.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 2 of the License, or (at your option) 
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this file; see the file COPYING.  If not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+/* As a special exception, if you include this header file into source files 
+   compiled by GCC, this header file does not by itself cause  the resulting 
+   executable to be covered by the GNU General Public License.  This exception 
+   does not however invalidate any other reasons why the executable file might be 
+   covered by the GNU General Public License.  */ 
+
+/* Single token vector data types for the PowerPC SIMD/Vector Multi-media 
+   eXtension */
+
+#ifndef _VECTORMATH_VEC_TYPES_H_
+#define _VECTORMATH_VEC_TYPES_H_	1
+
+#define qword		__vector unsigned char
+
+#define vec_uchar16	__vector unsigned char
+#define vec_char16	__vector signed char
+#define vec_bchar16	__vector bool char
+
+#define vec_ushort8	__vector unsigned short
+#define vec_short8	__vector signed short
+#define vec_bshort8	__vector bool short
+
+#define vec_pixel8	__vector pixel
+
+#define vec_uint4	__vector unsigned int
+#define vec_int4	__vector signed int
+#define vec_bint4	__vector bool int
+
+#define vec_float4	__vector float
+
+#define vec_ullong2	__vector bool char
+#define vec_llong2	__vector bool short
+
+#define vec_double2	__vector bool int
+
+#endif /* _VECTORMATH_VEC_TYPES_H_ */
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos.h
index 025e6fe88..119e2d292 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos.h
@@ -1,1960 +1,1960 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_H
-#define _VECTORMATH_AOS_C_H
-
-#include <math.h>
-#include <altivec.h>
-#include <vec_types.h>
-#include <simdmath.h>
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    vec_float4 vec128;
-} VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    vec_float4 vec128;
-} VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    vec_float4 vec128;
-} VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    vec_float4 vec128;
-} VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Copy a 3-D vector
- */
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
-
-/*
- * Set vector float data in a 3-D vector
- */
-static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D vector
- */
-static inline vec_float4 vmathV3Get128( const VmathVector3 *vec );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX( const VmathVector3 *vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY( const VmathVector3 *vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ( const VmathVector3 *vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV3MakeXAxis( VmathVector3 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV3MakeYAxis( VmathVector3 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV3MakeZAxis( VmathVector3 *result );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem( const VmathVector3 *vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem( const VmathVector3 *vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum( const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr( const VmathVector3 *vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length( const VmathVector3 *vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- * NOTE: 
- * Slower than column post-multiply.
- */
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D vectors in three quadwords
- */
-static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D vectors as half-floats
- */
-static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print( const VmathVector3 *vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 4-D vector
- */
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
-
-/*
- * Set vector float data in a 4-D vector
- */
-static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 4-D vector
- */
-static inline vec_float4 vmathV4Get128( const VmathVector4 *vec );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX( const VmathVector4 *vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY( const VmathVector4 *vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ( const VmathVector4 *vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW( const VmathVector4 *vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV4MakeXAxis( VmathVector4 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV4MakeYAxis( VmathVector4 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV4MakeZAxis( VmathVector4 *result );
-
-/*
- * Construct w axis
- */
-static inline void vmathV4MakeWAxis( VmathVector4 *result );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem( const VmathVector4 *vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem( const VmathVector4 *vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum( const VmathVector4 *vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr( const VmathVector4 *vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length( const VmathVector4 *vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
-
-/*
- * Store four 4-D vectors as half-floats
- */
-static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print( const VmathVector4 *vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 3-D point
- */
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
-
-/*
- * Set vector float data in a 3-D point
- */
-static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D point
- */
-static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX( const VmathPoint3 *pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY( const VmathPoint3 *pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ( const VmathPoint3 *pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem( const VmathPoint3 *pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum( const VmathPoint3 *pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D points in three quadwords
- */
-static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D points as half-floats
- */
-static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print( const VmathPoint3 *pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
-
-#endif
-
-/*
- * Copy a quaternion
- */
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
-
-/*
- * Set vector float data in a quaternion
- */
-static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a quaternion
- */
-static inline vec_float4 vmathQGet128( const VmathQuat *quat );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX( const VmathQuat *quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY( const VmathQuat *quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ( const VmathQuat *quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW( const VmathQuat *quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem( const VmathQuat *quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline void vmathQMakeIdentity( VmathQuat *result );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm( const VmathQuat *quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength( const VmathQuat *quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint( const VmathQuat *quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints( const VmathQuat *quat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x3 matrix
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant( const VmathMatrix3 *mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print( const VmathMatrix3 *mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 4x4 matrix
- */
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant( const VmathMatrix4 *mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print( const VmathMatrix4 *mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x4 transformation matrix
- */
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline void vmathT3MakeIdentity( VmathTransform3 *result );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print( const VmathTransform3 *tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_PPU_H
+#define _VECTORMATH_AOS_C_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include <simdmath.h>
+#include "vec_types.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    vec_float4 vec128;
+} VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    vec_float4 vec128;
+} VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    vec_float4 vec128;
+} VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    vec_float4 vec128;
+} VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Copy a 3-D vector
+ */
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
+
+/*
+ * Set vector float data in a 3-D vector
+ */
+static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D vector
+ */
+static inline vec_float4 vmathV3Get128( const VmathVector3 *vec );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX( const VmathVector3 *vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY( const VmathVector3 *vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ( const VmathVector3 *vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV3MakeXAxis( VmathVector3 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV3MakeYAxis( VmathVector3 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV3MakeZAxis( VmathVector3 *result );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem( const VmathVector3 *vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem( const VmathVector3 *vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum( const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr( const VmathVector3 *vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length( const VmathVector3 *vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ * NOTE: 
+ * Slower than column post-multiply.
+ */
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D vectors in three quadwords
+ */
+static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D vectors as half-floats
+ */
+static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print( const VmathVector3 *vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 4-D vector
+ */
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
+
+/*
+ * Set vector float data in a 4-D vector
+ */
+static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 4-D vector
+ */
+static inline vec_float4 vmathV4Get128( const VmathVector4 *vec );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX( const VmathVector4 *vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY( const VmathVector4 *vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ( const VmathVector4 *vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW( const VmathVector4 *vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV4MakeXAxis( VmathVector4 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV4MakeYAxis( VmathVector4 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV4MakeZAxis( VmathVector4 *result );
+
+/*
+ * Construct w axis
+ */
+static inline void vmathV4MakeWAxis( VmathVector4 *result );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem( const VmathVector4 *vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem( const VmathVector4 *vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum( const VmathVector4 *vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr( const VmathVector4 *vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length( const VmathVector4 *vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
+
+/*
+ * Store four 4-D vectors as half-floats
+ */
+static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print( const VmathVector4 *vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 3-D point
+ */
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
+
+/*
+ * Set vector float data in a 3-D point
+ */
+static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D point
+ */
+static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX( const VmathPoint3 *pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY( const VmathPoint3 *pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ( const VmathPoint3 *pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem( const VmathPoint3 *pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum( const VmathPoint3 *pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D points in three quadwords
+ */
+static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D points as half-floats
+ */
+static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print( const VmathPoint3 *pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
+
+#endif
+
+/*
+ * Copy a quaternion
+ */
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
+
+/*
+ * Set vector float data in a quaternion
+ */
+static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a quaternion
+ */
+static inline vec_float4 vmathQGet128( const VmathQuat *quat );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX( const VmathQuat *quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY( const VmathQuat *quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ( const VmathQuat *quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW( const VmathQuat *quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem( const VmathQuat *quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline void vmathQMakeIdentity( VmathQuat *result );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm( const VmathQuat *quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength( const VmathQuat *quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint( const VmathQuat *quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints( const VmathQuat *quat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x3 matrix
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant( const VmathMatrix3 *mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print( const VmathMatrix3 *mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 4x4 matrix
+ */
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant( const VmathMatrix4 *mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print( const VmathMatrix4 *mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x4 transformation matrix
+ */
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline void vmathT3MakeIdentity( VmathTransform3 *result );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print( const VmathTransform3 *tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos_v.h
index 066a39311..e92ce9101 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_aos_v.h
@@ -1,1924 +1,1925 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_V_H
-#define _VECTORMATH_AOS_C_V_H
-
-#include <math.h>
-#include <altivec.h>
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    vec_float4 vec128;
-} VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    vec_float4 vec128;
-} VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    vec_float4 vec128;
-} VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    vec_float4 vec128;
-} VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 3-D vector
- */
-static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D vector
- */
-static inline vec_float4 vmathV3Get128_V( VmathVector3 vec );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX_V( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY_V( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX_V( VmathVector3 vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY_V( VmathVector3 vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ_V( VmathVector3 vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector3 vmathV3MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector3 vmathV3MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector3 vmathV3MakeZAxis_V( );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem_V( VmathVector3 vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem_V( VmathVector3 vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum_V( VmathVector3 vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr_V( VmathVector3 vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length_V( VmathVector3 vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- * NOTE: 
- * Slower than column post-multiply.
- */
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D vectors in three quadwords
- */
-static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D vectors as half-floats
- */
-static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print_V( VmathVector3 vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 4-D vector
- */
-static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 4-D vector
- */
-static inline vec_float4 vmathV4Get128_V( VmathVector4 vec );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX_V( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY_V( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW_V( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX_V( VmathVector4 vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY_V( VmathVector4 vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ_V( VmathVector4 vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW_V( VmathVector4 vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector4 vmathV4MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector4 vmathV4MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector4 vmathV4MakeZAxis_V( );
-
-/*
- * Construct w axis
- */
-static inline VmathVector4 vmathV4MakeWAxis_V( );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem_V( VmathVector4 vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem_V( VmathVector4 vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum_V( VmathVector4 vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr_V( VmathVector4 vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length_V( VmathVector4 vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
-
-/*
- * Store four 4-D vectors as half-floats
- */
-static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print_V( VmathVector4 vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 3-D point
- */
-static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D point
- */
-static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX_V( VmathPoint3 pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY_V( VmathPoint3 pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ_V( VmathPoint3 pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum_V( VmathPoint3 pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D points in three quadwords
- */
-static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D points as half-floats
- */
-static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print_V( VmathPoint3 pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
-
-#endif
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a quaternion
- */
-static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a quaternion
- */
-static inline vec_float4 vmathQGet128_V( VmathQuat quat );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX_V( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY_V( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ_V( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW_V( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX_V( VmathQuat quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY_V( VmathQuat quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ_V( VmathQuat quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW_V( VmathQuat quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem_V( VmathQuat quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline VmathQuat vmathQNeg_V( VmathQuat quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline VmathQuat vmathQMakeIdentity_V( );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline VmathQuat vmathQMakeRotationX_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline VmathQuat vmathQMakeRotationY_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline VmathQuat vmathQMakeRotationZ_V( float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline VmathQuat vmathQConj_V( VmathQuat quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm_V( VmathQuat quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength_V( VmathQuat quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint_V( VmathQuat quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints_V( VmathQuat quat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3MakeIdentity_V( );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant_V( VmathMatrix3 mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print_V( VmathMatrix3 mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4MakeIdentity_V( );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant_V( VmathMatrix4 mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print_V( VmathMatrix4 mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline VmathTransform3 vmathT3MakeIdentity_V( );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- * However, the transfer of select1 to a VMX register may use more processing time than a branch.
- */
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print_V( VmathTransform3 tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vectormath_aos.h"
-#include "vec_aos_v.h"
-#include "quat_aos_v.h"
-#include "mat_aos_v.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_V_PPU_H
+#define _VECTORMATH_AOS_C_V_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include "vec_types.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    vec_float4 vec128;
+} VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    vec_float4 vec128;
+} VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    vec_float4 vec128;
+} VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    vec_float4 vec128;
+} VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 3-D vector
+ */
+static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D vector
+ */
+static inline vec_float4 vmathV3Get128_V( VmathVector3 vec );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX_V( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY_V( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX_V( VmathVector3 vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY_V( VmathVector3 vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ_V( VmathVector3 vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector3 vmathV3MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector3 vmathV3MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector3 vmathV3MakeZAxis_V( );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem_V( VmathVector3 vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem_V( VmathVector3 vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum_V( VmathVector3 vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr_V( VmathVector3 vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length_V( VmathVector3 vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ * NOTE: 
+ * Slower than column post-multiply.
+ */
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D vectors in three quadwords
+ */
+static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D vectors as half-floats
+ */
+static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print_V( VmathVector3 vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 4-D vector
+ */
+static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 4-D vector
+ */
+static inline vec_float4 vmathV4Get128_V( VmathVector4 vec );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX_V( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY_V( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW_V( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX_V( VmathVector4 vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY_V( VmathVector4 vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ_V( VmathVector4 vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW_V( VmathVector4 vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector4 vmathV4MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector4 vmathV4MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector4 vmathV4MakeZAxis_V( );
+
+/*
+ * Construct w axis
+ */
+static inline VmathVector4 vmathV4MakeWAxis_V( );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem_V( VmathVector4 vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem_V( VmathVector4 vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum_V( VmathVector4 vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr_V( VmathVector4 vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length_V( VmathVector4 vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
+
+/*
+ * Store four 4-D vectors as half-floats
+ */
+static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print_V( VmathVector4 vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 3-D point
+ */
+static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D point
+ */
+static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX_V( VmathPoint3 pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY_V( VmathPoint3 pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ_V( VmathPoint3 pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum_V( VmathPoint3 pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D points in three quadwords
+ */
+static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D points as half-floats
+ */
+static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print_V( VmathPoint3 pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
+
+#endif
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a quaternion
+ */
+static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a quaternion
+ */
+static inline vec_float4 vmathQGet128_V( VmathQuat quat );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX_V( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY_V( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ_V( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW_V( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX_V( VmathQuat quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY_V( VmathQuat quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ_V( VmathQuat quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW_V( VmathQuat quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem_V( VmathQuat quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline VmathQuat vmathQNeg_V( VmathQuat quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline VmathQuat vmathQMakeIdentity_V( );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline VmathQuat vmathQMakeRotationX_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline VmathQuat vmathQMakeRotationY_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline VmathQuat vmathQMakeRotationZ_V( float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline VmathQuat vmathQConj_V( VmathQuat quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm_V( VmathQuat quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength_V( VmathQuat quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint_V( VmathQuat quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints_V( VmathQuat quat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3MakeIdentity_V( );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant_V( VmathMatrix3 mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print_V( VmathMatrix3 mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4MakeIdentity_V( );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant_V( VmathMatrix4 mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print_V( VmathMatrix4 mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline VmathTransform3 vmathT3MakeIdentity_V( );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ * However, the transfer of select1 to a VMX register may use more processing time than a branch.
+ */
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print_V( VmathTransform3 tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vectormath_aos.h"
+#include "vec_aos_v.h"
+#include "quat_aos_v.h"
+#include "mat_aos_v.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa.h
index 1b8b8c6bf..f52b5aa9d 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa.h
@@ -1,2013 +1,2013 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_C_H
-#define _VECTORMATH_SOA_C_H
-
-#include <math.h>
-#include <altivec.h>
-#include "vectormath_aos.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_SOA_C_TYPES_H
-#define _VECTORMATH_SOA_C_TYPES_H
-
-/* A set of four 3-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaVector3;
-
-/* A set of four 4-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector4
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaVector4;
-
-/* A set of four 3-D points in structure-of-arrays format
- */
-typedef struct _VmathSoaPoint3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaPoint3;
-
-/* A set of four quaternions in structure-of-arrays format
- */
-typedef struct _VmathSoaQuat
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaQuat;
-
-/* A set of four 3x3 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-} VmathSoaMatrix3;
-
-/* A set of four 4x4 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix4
-{
-    VmathSoaVector4 col0;
-    VmathSoaVector4 col1;
-    VmathSoaVector4 col2;
-    VmathSoaVector4 col3;
-} VmathSoaMatrix4;
-
-/* A set of four 3x4 transformation matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaTransform3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-    VmathSoaVector3 col3;
-} VmathSoaTransform3;
-
-#endif
-
-/*
- * Copy a 3-D vector
- */
-static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D vector
- */
-static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec );
-
-/*
- * Insert four AoS 3-D vectors
- */
-static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 );
-
-/*
- * Extract four AoS 3-D vectors
- */
-static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D vector in three quadwords
- */
-static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D vectors as half-floats
- */
-static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Print( const VmathSoaVector3 *vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 4-D vector
- */
-static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 4-D vector
- */
-static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec );
-
-/*
- * Insert four AoS 4-D vectors
- */
-static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 );
-
-/*
- * Extract four AoS 4-D vectors
- */
-static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result );
-
-/*
- * Construct w axis
- */
-static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 );
-
-/*
- * Store four slots of an SoA 4-D vector as half-floats
- */
-static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Print( const VmathSoaVector4 *vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 3-D point
- */
-static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D point
- */
-static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Insert four AoS 3-D points
- */
-static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 );
-
-/*
- * Extract four AoS 3-D points
- */
-static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D point in three quadwords
- */
-static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *pnt, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D points as half-floats
- */
-static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name );
-
-#endif
-
-/*
- * Copy a quaternion
- */
-static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS quaternion
- */
-static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat );
-
-/*
- * Insert four AoS quaternions
- */
-static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 );
-
-/*
- * Extract four AoS quaternions
- */
-static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrint( const VmathSoaQuat *quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x3 matrix
- */
-static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x3 matrix
- */
-static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Insert four AoS 3x3 matrices
- */
-static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 );
-
-/*
- * Extract four AoS 3x3 matrices
- */
-static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 4x4 matrix
- */
-static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0, const VmathSoaVector4 *col1, const VmathSoaVector4 *col2, const VmathSoaVector4 *col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 4x4 matrix
- */
-static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Insert four AoS 4x4 matrices
- */
-static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 );
-
-/*
- * Extract four AoS 4x4 matrices
- */
-static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x4 transformation matrix
- */
-static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2, const VmathSoaVector3 *col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x4 transformation matrix
- */
-static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Insert four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 );
-
-/*
- * Extract four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vec_soa.h"
-#include "quat_soa.h"
-#include "mat_soa.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_C_PPU_H
+#define _VECTORMATH_SOA_C_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include "vectormath_aos.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_SOA_C_TYPES_H
+#define _VECTORMATH_SOA_C_TYPES_H
+
+/* A set of four 3-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaVector3;
+
+/* A set of four 4-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector4
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaVector4;
+
+/* A set of four 3-D points in structure-of-arrays format
+ */
+typedef struct _VmathSoaPoint3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaPoint3;
+
+/* A set of four quaternions in structure-of-arrays format
+ */
+typedef struct _VmathSoaQuat
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaQuat;
+
+/* A set of four 3x3 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+} VmathSoaMatrix3;
+
+/* A set of four 4x4 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix4
+{
+    VmathSoaVector4 col0;
+    VmathSoaVector4 col1;
+    VmathSoaVector4 col2;
+    VmathSoaVector4 col3;
+} VmathSoaMatrix4;
+
+/* A set of four 3x4 transformation matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaTransform3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+    VmathSoaVector3 col3;
+} VmathSoaTransform3;
+
+#endif
+
+/*
+ * Copy a 3-D vector
+ */
+static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D vector
+ */
+static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Insert four AoS 3-D vectors
+ */
+static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 );
+
+/*
+ * Extract four AoS 3-D vectors
+ */
+static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D vector in three quadwords
+ */
+static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D vectors as half-floats
+ */
+static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Print( const VmathSoaVector3 *vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 4-D vector
+ */
+static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4-D vector
+ */
+static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Insert four AoS 4-D vectors
+ */
+static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 );
+
+/*
+ * Extract four AoS 4-D vectors
+ */
+static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct w axis
+ */
+static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 );
+
+/*
+ * Store four slots of an SoA 4-D vector as half-floats
+ */
+static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Print( const VmathSoaVector4 *vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 3-D point
+ */
+static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D point
+ */
+static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Insert four AoS 3-D points
+ */
+static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 );
+
+/*
+ * Extract four AoS 3-D points
+ */
+static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D point in three quadwords
+ */
+static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *pnt, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D points as half-floats
+ */
+static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name );
+
+#endif
+
+/*
+ * Copy a quaternion
+ */
+static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS quaternion
+ */
+static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat );
+
+/*
+ * Insert four AoS quaternions
+ */
+static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 );
+
+/*
+ * Extract four AoS quaternions
+ */
+static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrint( const VmathSoaQuat *quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x3 matrix
+ */
+static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x3 matrix
+ */
+static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Insert four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 );
+
+/*
+ * Extract four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 4x4 matrix
+ */
+static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0, const VmathSoaVector4 *col1, const VmathSoaVector4 *col2, const VmathSoaVector4 *col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4x4 matrix
+ */
+static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Insert four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 );
+
+/*
+ * Extract four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2, const VmathSoaVector3 *col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x4 transformation matrix
+ */
+static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Insert four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 );
+
+/*
+ * Extract four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vec_soa.h"
+#include "quat_soa.h"
+#include "mat_soa.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa_v.h b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa_v.h
index 6865cef2f..03fd011e9 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/c/vectormath_soa_v.h
@@ -1,1979 +1,1979 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_C_V_H
-#define _VECTORMATH_SOA_C_V_H
-
-#include <math.h>
-#include <altivec.h>
-#include "vectormath_aos_v.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_SOA_C_TYPES_H
-#define _VECTORMATH_SOA_C_TYPES_H
-
-/* A set of four 3-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaVector3;
-
-/* A set of four 4-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector4
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaVector4;
-
-/* A set of four 3-D points in structure-of-arrays format
- */
-typedef struct _VmathSoaPoint3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaPoint3;
-
-/* A set of four quaternions in structure-of-arrays format
- */
-typedef struct _VmathSoaQuat
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaQuat;
-
-/* A set of four 3x3 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-} VmathSoaMatrix3;
-
-/* A set of four 4x4 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix4
-{
-    VmathSoaVector4 col0;
-    VmathSoaVector4 col1;
-    VmathSoaVector4 col2;
-    VmathSoaVector4 col3;
-} VmathSoaMatrix4;
-
-/* A set of four 3x4 transformation matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaTransform3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-    VmathSoaVector3 col3;
-} VmathSoaTransform3;
-
-#endif
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec );
-
-/*
- * Insert four AoS 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 );
-
-/*
- * Extract four AoS 3-D vectors
- */
-static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D vector in three quadwords
- */
-static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D vectors as half-floats
- */
-static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Print_V( VmathSoaVector3 vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec );
-
-/*
- * Insert four AoS 4-D vectors
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 );
-
-/*
- * Extract four AoS 4-D vectors
- */
-static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( );
-
-/*
- * Construct w axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 );
-
-/*
- * Store four slots of an SoA 4-D vector as half-floats
- */
-static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Print_V( VmathSoaVector4 vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt );
-
-/*
- * Insert four AoS 3-D points
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 );
-
-/*
- * Extract four AoS 3-D points
- */
-static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D point in three quadwords
- */
-static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 pnt, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D points as half-floats
- */
-static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name );
-
-#endif
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat );
-
-/*
- * Insert four AoS quaternions
- */
-static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 );
-
-/*
- * Extract four AoS quaternions
- */
-static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeIdentity_V( );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat unitQuat, VmathSoaVector3 vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrint_V( VmathSoaQuat quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat );
-
-/*
- * Insert four AoS 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 );
-
-/*
- * Extract four AoS 3x3 matrices
- */
-static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 col0, VmathSoaVector4 col1, VmathSoaVector4 col2, VmathSoaVector4 col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat );
-
-/*
- * Insert four AoS 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 );
-
-/*
- * Extract four AoS 4x4 matrices
- */
-static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2, VmathSoaVector3 col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x4 transformation matrix
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm );
-
-/*
- * Insert four AoS 3x4 transformation matrices
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 );
-
-/*
- * Extract four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vectormath_soa.h"
-#include "vec_soa_v.h"
-#include "quat_soa_v.h"
-#include "mat_soa_v.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_C_V_PPU_H
+#define _VECTORMATH_SOA_C_V_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include "vectormath_aos_v.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_SOA_C_TYPES_H
+#define _VECTORMATH_SOA_C_TYPES_H
+
+/* A set of four 3-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaVector3;
+
+/* A set of four 4-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector4
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaVector4;
+
+/* A set of four 3-D points in structure-of-arrays format
+ */
+typedef struct _VmathSoaPoint3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaPoint3;
+
+/* A set of four quaternions in structure-of-arrays format
+ */
+typedef struct _VmathSoaQuat
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaQuat;
+
+/* A set of four 3x3 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+} VmathSoaMatrix3;
+
+/* A set of four 4x4 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix4
+{
+    VmathSoaVector4 col0;
+    VmathSoaVector4 col1;
+    VmathSoaVector4 col2;
+    VmathSoaVector4 col3;
+} VmathSoaMatrix4;
+
+/* A set of four 3x4 transformation matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaTransform3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+    VmathSoaVector3 col3;
+} VmathSoaTransform3;
+
+#endif
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec );
+
+/*
+ * Insert four AoS 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 );
+
+/*
+ * Extract four AoS 3-D vectors
+ */
+static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D vector in three quadwords
+ */
+static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D vectors as half-floats
+ */
+static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Print_V( VmathSoaVector3 vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec );
+
+/*
+ * Insert four AoS 4-D vectors
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 );
+
+/*
+ * Extract four AoS 4-D vectors
+ */
+static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( );
+
+/*
+ * Construct w axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 );
+
+/*
+ * Store four slots of an SoA 4-D vector as half-floats
+ */
+static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Print_V( VmathSoaVector4 vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt );
+
+/*
+ * Insert four AoS 3-D points
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 );
+
+/*
+ * Extract four AoS 3-D points
+ */
+static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D point in three quadwords
+ */
+static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 pnt, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D points as half-floats
+ */
+static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name );
+
+#endif
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat );
+
+/*
+ * Insert four AoS quaternions
+ */
+static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 );
+
+/*
+ * Extract four AoS quaternions
+ */
+static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeIdentity_V( );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat unitQuat, VmathSoaVector3 vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrint_V( VmathSoaQuat quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat );
+
+/*
+ * Insert four AoS 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 );
+
+/*
+ * Extract four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 col0, VmathSoaVector4 col1, VmathSoaVector4 col2, VmathSoaVector4 col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat );
+
+/*
+ * Insert four AoS 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 );
+
+/*
+ * Extract four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2, VmathSoaVector3 col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x4 transformation matrix
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm );
+
+/*
+ * Insert four AoS 3x4 transformation matrices
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 );
+
+/*
+ * Extract four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vectormath_soa.h"
+#include "vec_soa_v.h"
+#include "quat_soa_v.h"
+#include "mat_soa_v.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/boolInVec.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/boolInVec.h
index d928c0f58..dc678b803 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/boolInVec.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/boolInVec.h
@@ -1,261 +1,261 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _BOOLINVEC_H
-#define _BOOLINVEC_H
-
-#include <math.h>
-#include <altivec.h>
-#include "vec_types.h"
-#undef bool
-
-namespace Vectormath {
-
-class floatInVec;
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec class
-//
-
-class boolInVec
-{
-    private:
-        vec_uint4 mData;
-
-        inline boolInVec(vec_uint4 vec);
-    public:
-        inline boolInVec() {}
-
-        // matches standard type conversions
-        //
-        inline boolInVec(floatInVec vec);
-
-        // explicit cast from bool
-        //
-        explicit inline boolInVec(bool scalar);
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-        // explicit cast to bool
-        // 
-        inline bool getAsBool() const;
-#else
-        // implicit cast to bool
-        // 
-        inline operator bool() const;
-#endif
-        
-        // get vector data
-        // bool value is splatted across all word slots of vector as 0 (false) or -1 (true)
-        //
-        inline vec_uint4 get128() const;
-
-        // operators
-        //
-        inline const boolInVec operator ! () const;
-        inline boolInVec& operator = (boolInVec vec);
-        inline boolInVec& operator &= (boolInVec vec);
-        inline boolInVec& operator ^= (boolInVec vec);
-        inline boolInVec& operator |= (boolInVec vec);
-
-        // friend functions
-        //
-        friend inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
-};
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec functions
-//
-
-// operators
-//
-inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
-
-// select between vec0 and vec1 using boolInVec.
-// false selects vec0, true selects vec1
-//
-inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
-
-} // namespace Vectormath
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec implementation
-//
-
-#include "floatInVec.h"
-
-namespace Vectormath {
-
-inline
-boolInVec::boolInVec(vec_uint4 vec)
-{
-    mData = vec;
-}
-
-inline
-boolInVec::boolInVec(floatInVec vec)
-{
-    *this = (vec != floatInVec(0.0f));
-}
-
-inline
-boolInVec::boolInVec(bool scalar)
-{
-#ifdef __GNUC__
-    if (__builtin_constant_p(scalar))
-    {
-        const unsigned int mask = -(int)scalar;
-        mData = (vec_uint4){mask, mask, mask, mask};
-    }
-    else
-#endif
-    {
-        unsigned int mask = -(int)scalar;
-        vec_uint4 vec = vec_ld(0, &mask);
-        mData = vec_splat(vec_perm(vec, vec, vec_lvsl(0, &mask)), 0);
-    }
-}
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline
-bool
-boolInVec::getAsBool() const
-#else
-inline
-boolInVec::operator bool() const
-#endif
-{
-    return vec_all_gt(mData, ((vec_uint4){0,0,0,0}));
-}
-
-inline
-vec_uint4
-boolInVec::get128() const
-{
-    return mData;
-}
-
-inline
-const boolInVec
-boolInVec::operator ! () const
-{
-    return boolInVec(vec_nor(mData, mData));
-}
-
-inline
-boolInVec&
-boolInVec::operator = (boolInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator &= (boolInVec vec)
-{
-    *this = *this & vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator ^= (boolInVec vec)
-{
-    *this = *this ^ vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator |= (boolInVec vec)
-{
-    *this = *this | vec;
-    return *this;
-}
-
-inline
-const boolInVec
-operator == (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec((vec_uint4)vec_cmpeq(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator != (boolInVec vec0, boolInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-    
-inline
-const boolInVec
-operator & (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec_and(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator | (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec_or(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator ^ (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(vec_xor(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
-{
-    return boolInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
-}
- 
-} // namespace Vectormath
-
-#endif // boolInVec_h
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <math.h>
+#include <altivec.h>
+#include "../c/vec_types.h"
+#undef bool
+
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+    private:
+        vec_uint4 mData;
+
+        inline boolInVec(vec_uint4 vec);
+    public:
+        inline boolInVec() {}
+
+        // matches standard type conversions
+        //
+        inline boolInVec(floatInVec vec);
+
+        // explicit cast from bool
+        //
+        explicit inline boolInVec(bool scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to bool
+        // 
+        inline bool getAsBool() const;
+#else
+        // implicit cast to bool
+        // 
+        inline operator bool() const;
+#endif
+        
+        // get vector data
+        // bool value is splatted across all word slots of vector as 0 (false) or -1 (true)
+        //
+        inline vec_uint4 get128() const;
+
+        // operators
+        //
+        inline const boolInVec operator ! () const;
+        inline boolInVec& operator = (boolInVec vec);
+        inline boolInVec& operator &= (boolInVec vec);
+        inline boolInVec& operator ^= (boolInVec vec);
+        inline boolInVec& operator |= (boolInVec vec);
+
+        // friend functions
+        //
+        friend inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec functions
+//
+
+// operators
+//
+inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(vec_uint4 vec)
+{
+    mData = vec;
+}
+
+inline
+boolInVec::boolInVec(floatInVec vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+#ifdef __GNUC__
+    if (__builtin_constant_p(scalar))
+    {
+        const unsigned int mask = -(int)scalar;
+        mData = (vec_uint4){mask, mask, mask, mask};
+    }
+    else
+#endif
+    {
+        unsigned int mask = -(int)scalar;
+        vec_uint4 vec = vec_ld(0, &mask);
+        mData = vec_splat(vec_perm(vec, vec, vec_lvsl(0, &mask)), 0);
+    }
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+bool
+boolInVec::getAsBool() const
+#else
+inline
+boolInVec::operator bool() const
+#endif
+{
+    return vec_all_gt(mData, ((vec_uint4){0,0,0,0}));
+}
+
+inline
+vec_uint4
+boolInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(vec_nor(mData, mData));
+}
+
+inline
+boolInVec&
+boolInVec::operator = (boolInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (boolInVec vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (boolInVec vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (boolInVec vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec((vec_uint4)vec_cmpeq(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (boolInVec vec0, boolInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+    
+inline
+const boolInVec
+operator & (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec_and(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator | (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec_or(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator ^ (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec_xor(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
+{
+    return boolInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+ 
+} // namespace Vectormath
+
+#endif // boolInVec_h
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/floatInVec.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/floatInVec.h
index 54eb72523..22b549b76 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/floatInVec.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/floatInVec.h
@@ -1,361 +1,361 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _FLOATINVEC_H
-#define _FLOATINVEC_H
-
-#include <math.h>
-#include <altivec.h>
-#include <stddef.h>
-#include "vec_types.h"
-#include "simdmath.h"
-#undef bool
-
-namespace Vectormath {
-
-class boolInVec;
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec class
-//
-
-class floatInVec
-{
-    private:
-        vec_float4 mData;
-
-        inline floatInVec(vec_float4 vec);
-    public:
-        inline floatInVec() {}
-
-        // matches standard type conversions
-        //
-        inline floatInVec(boolInVec vec);
-
-        // construct from a slot of vec_float4
-        //
-        inline floatInVec(vec_float4 vec, int slot);
-        
-        // explicit cast from float
-        //
-        explicit inline floatInVec(float scalar);
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-        // explicit cast to float
-        // 
-        inline float getAsFloat() const;
-#else
-        // implicit cast to float
-        //
-        inline operator float() const;
-#endif
-
-        // get vector data
-        // float value is splatted across all word slots of vector
-        //
-        inline vec_float4 get128() const;
-
-        // operators
-        // 
-        inline const floatInVec operator ++ (int);
-        inline const floatInVec operator -- (int);
-        inline floatInVec& operator ++ ();
-        inline floatInVec& operator -- ();
-        inline const floatInVec operator - () const;
-        inline floatInVec& operator = (floatInVec vec);
-        inline floatInVec& operator *= (floatInVec vec);
-        inline floatInVec& operator /= (floatInVec vec);
-        inline floatInVec& operator += (floatInVec vec);
-        inline floatInVec& operator -= (floatInVec vec);
-
-        // friend functions
-        //
-        friend inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
-};
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec functions
-//
-
-// operators
-// 
-inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
-
-// select between vec0 and vec1 using boolInVec.
-// false selects vec0, true selects vec1
-//
-inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
-
-} // namespace Vectormath
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec implementation
-//
-
-#include "boolInVec.h"
-
-namespace Vectormath {
-
-inline
-floatInVec::floatInVec(vec_float4 vec)
-{
-    mData = vec;
-}
-
-inline
-floatInVec::floatInVec(boolInVec vec)
-{
-    mData = vec_ctf(vec_sub((vec_uint4){0,0,0,0}, vec.get128()), 0);
-}
-
-inline
-floatInVec::floatInVec(vec_float4 vec, int slot)
-{
-#ifdef __GNUC__
-    if (__builtin_constant_p(slot))
-    {
-        mData = vec_splat(vec, slot);
-    }
-    else
-#endif
-    {
-        const vec_uchar16 shiftpattern = vec_lvsl(0, (float *)(size_t)(slot << 2));
-        mData = vec_splat(vec_perm(vec, vec, shiftpattern), 0);
-    }
-}
-
-inline
-floatInVec::floatInVec(float scalar)
-{
-#ifdef __GNUC__
-    if (__builtin_constant_p(scalar))
-    {
-        mData = (vec_float4){scalar, scalar, scalar, scalar};
-    }
-    else
-#endif
-    {
-        vec_float4 vec = vec_ld(0, &scalar);
-        mData = vec_splat(vec_perm(vec, vec, vec_lvsl(0, &scalar)), 0);
-    }
-}
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline
-float
-floatInVec::getAsFloat() const
-#else
-inline
-floatInVec::operator float() const
-#endif
-{
-    return *((float *)&mData);
-}
-
-inline
-vec_float4
-floatInVec::get128() const
-{
-    return mData;
-}
-
-inline
-const floatInVec
-floatInVec::operator ++ (int)
-{
-    vec_float4 olddata = mData;
-    operator ++();
-    return floatInVec(olddata);
-}
-
-inline
-const floatInVec
-floatInVec::operator -- (int)
-{
-    vec_float4 olddata = mData;
-    operator --();
-    return floatInVec(olddata);
-}
-
-inline
-floatInVec&
-floatInVec::operator ++ ()
-{
-    *this += floatInVec((vec_float4){1.0f,1.0f,1.0f,1.0f});
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -- ()
-{
-    *this -= floatInVec((vec_float4){1.0f,1.0f,1.0f,1.0f});
-    return *this;
-}
-
-inline
-const floatInVec
-floatInVec::operator - () const
-{
-    return floatInVec((vec_float4)vec_xor((vec_uint4)mData, (vec_uint4){0x80000000,0x80000000,0x80000000,0x80000000}));
-}
-
-inline
-floatInVec&
-floatInVec::operator = (floatInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator *= (floatInVec vec)
-{
-    *this = *this * vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator /= (floatInVec vec)
-{
-    *this = *this / vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator += (floatInVec vec)
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -= (floatInVec vec)
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline
-const floatInVec
-operator * (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec_madd(vec0.get128(), vec1.get128(), (vec_float4){0,0,0,0}));
-}
-
-inline
-const floatInVec
-operator / (floatInVec num, floatInVec den)
-{
-    return floatInVec(divf4(num.get128(), den.get128()));
-}
-
-inline
-const floatInVec
-operator + (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec_add(vec0.get128(), vec1.get128()));
-}
-
-inline
-const floatInVec
-operator - (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(vec_sub(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator < (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec((vec_uint4)vec_cmpgt(vec1.get128(), vec0.get128()));
-}
-
-inline
-const boolInVec
-operator <= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 > vec1);
-}
-
-inline
-const boolInVec
-operator > (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec((vec_uint4)vec_cmpgt(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator >= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 < vec1);
-}
-
-inline
-const boolInVec
-operator == (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec((vec_uint4)vec_cmpeq(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator != (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-    
-inline
-const floatInVec
-select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
-{
-    return floatInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
-}
-
-} // namespace Vectormath
-
-#endif // floatInVec_h
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+#include <altivec.h>
+#include <stddef.h>
+#include <simdmath.h>
+#include "../c/vec_types.h"
+#undef bool
+
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+class floatInVec
+{
+    private:
+        vec_float4 mData;
+
+        inline floatInVec(vec_float4 vec);
+    public:
+        inline floatInVec() {}
+
+        // matches standard type conversions
+        //
+        inline floatInVec(boolInVec vec);
+
+        // construct from a slot of vec_float4
+        //
+        inline floatInVec(vec_float4 vec, int slot);
+        
+        // explicit cast from float
+        //
+        explicit inline floatInVec(float scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to float
+        // 
+        inline float getAsFloat() const;
+#else
+        // implicit cast to float
+        //
+        inline operator float() const;
+#endif
+
+        // get vector data
+        // float value is splatted across all word slots of vector
+        //
+        inline vec_float4 get128() const;
+
+        // operators
+        // 
+        inline const floatInVec operator ++ (int);
+        inline const floatInVec operator -- (int);
+        inline floatInVec& operator ++ ();
+        inline floatInVec& operator -- ();
+        inline const floatInVec operator - () const;
+        inline floatInVec& operator = (floatInVec vec);
+        inline floatInVec& operator *= (floatInVec vec);
+        inline floatInVec& operator /= (floatInVec vec);
+        inline floatInVec& operator += (floatInVec vec);
+        inline floatInVec& operator -= (floatInVec vec);
+
+        // friend functions
+        //
+        friend inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec functions
+//
+
+// operators
+// 
+inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(vec_float4 vec)
+{
+    mData = vec;
+}
+
+inline
+floatInVec::floatInVec(boolInVec vec)
+{
+    mData = vec_ctf(vec_sub((vec_uint4){0,0,0,0}, vec.get128()), 0);
+}
+
+inline
+floatInVec::floatInVec(vec_float4 vec, int slot)
+{
+#ifdef __GNUC__
+    if (__builtin_constant_p(slot))
+    {
+        mData = vec_splat(vec, slot);
+    }
+    else
+#endif
+    {
+        const vec_uchar16 shiftpattern = vec_lvsl(0, (float *)(size_t)(slot << 2));
+        mData = vec_splat(vec_perm(vec, vec, shiftpattern), 0);
+    }
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+#ifdef __GNUC__
+    if (__builtin_constant_p(scalar))
+    {
+        mData = (vec_float4){scalar, scalar, scalar, scalar};
+    }
+    else
+#endif
+    {
+        vec_float4 vec = vec_ld(0, &scalar);
+        mData = vec_splat(vec_perm(vec, vec, vec_lvsl(0, &scalar)), 0);
+    }
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+float
+floatInVec::getAsFloat() const
+#else
+inline
+floatInVec::operator float() const
+#endif
+{
+    return *((float *)&mData);
+}
+
+inline
+vec_float4
+floatInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    vec_float4 olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    vec_float4 olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec((vec_float4){1.0f,1.0f,1.0f,1.0f});
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec((vec_float4){1.0f,1.0f,1.0f,1.0f});
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec((vec_float4)vec_xor((vec_uint4)mData, (vec_uint4){0x80000000,0x80000000,0x80000000,0x80000000}));
+}
+
+inline
+floatInVec&
+floatInVec::operator = (floatInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (floatInVec vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (floatInVec vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (floatInVec vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (floatInVec vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec_madd(vec0.get128(), vec1.get128(), (vec_float4){0,0,0,0}));
+}
+
+inline
+const floatInVec
+operator / (floatInVec num, floatInVec den)
+{
+    return floatInVec(divf4(num.get128(), den.get128()));
+}
+
+inline
+const floatInVec
+operator + (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec_add(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator - (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec_sub(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator < (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec((vec_uint4)vec_cmpgt(vec1.get128(), vec0.get128()));
+}
+
+inline
+const boolInVec
+operator <= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 > vec1);
+}
+
+inline
+const boolInVec
+operator > (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec((vec_uint4)vec_cmpgt(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator >= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 < vec1);
+}
+
+inline
+const boolInVec
+operator == (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec((vec_uint4)vec_cmpeq(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+    
+inline
+const floatInVec
+select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
+{
+    return floatInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_aos.h
index 77a184725..11cdcddf6 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_aos.h
@@ -1,2188 +1,2188 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_CPP_H
-#define _VECTORMATH_MAT_AOS_CPP_H
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Constants
-// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
-#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
-#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
-#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
-#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( floatInVec scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( Quat unitQuat )
-{
-    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
-    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
-    xyzw_2 = vec_add( unitQuat.get128(), unitQuat.get128() );
-    wwww = vec_splat( unitQuat.get128(), 3 );
-    yzxw = vec_perm( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_PERM_YZXW );
-    zxyw = vec_perm( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_PERM_ZXYW );
-    yzxw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_YZXW );
-    zxyw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_ZXYW );
-    tmp0 = vec_madd( yzxw_2, wwww, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_nmsub( yzxw, yzxw_2, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    tmp2 = vec_madd( yzxw, xyzw_2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp0 = vec_madd( zxyw, xyzw_2, tmp0 );
-    tmp1 = vec_nmsub( zxyw, zxyw_2, tmp1 );
-    tmp2 = vec_nmsub( zxyw_2, wwww, tmp2 );
-    tmp3 = vec_sel( tmp0, tmp1, select_x );
-    tmp4 = vec_sel( tmp1, tmp2, select_x );
-    tmp5 = vec_sel( tmp2, tmp0, select_x );
-    mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
-    mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
-    mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
-}
-
-inline Matrix3::Matrix3( Vector3 _col0, Vector3 _col1, Vector3 _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3 & Matrix3::setCol0( Vector3 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( Vector3 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( Vector3 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, Vector3 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, Vector3 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, floatInVec val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline const floatInVec Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, res0, res1, res2;
-    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
-    res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
-    res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
-    res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
-    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
-    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
-    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
-    dot = vec_splat( dot, 0 );
-    invdet = recipf4( dot );
-    tmp3 = vec_mergeh( tmp0, tmp2 );
-    tmp4 = vec_mergel( tmp0, tmp2 );
-    inv0 = vec_mergeh( tmp3, tmp1 );
-    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
-    inv0 = vec_madd( inv0, invdet, zero );
-    inv1 = vec_madd( inv1, invdet, zero );
-    inv2 = vec_madd( inv2, invdet, zero );
-    return Matrix3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 )
-    );
-}
-
-inline const floatInVec determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Matrix3 Matrix3::operator *( floatInVec scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( float scalar )
-{
-    return *this *= floatInVec(scalar);
-}
-
-inline Matrix3 & Matrix3::operator *=( floatInVec scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
-{
-    return floatInVec(scalar) * mat;
-}
-
-inline const Matrix3 operator *( floatInVec scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec.get128(), 0 );
-    yyyy = vec_splat( vec.get128(), 1 );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( mCol1.get128(), yyyy, res );
-    res = vec_madd( mCol2.get128(), zzzz, res );
-    return Vector3( res );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Matrix3 Matrix3::rotationX( floatInVec radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Matrix3 Matrix3::rotationY( floatInVec radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3::yAxis( ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Matrix3 Matrix3::rotationZ( floatInVec radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    angles = Vector4( radiansXYZ, 0.0f ).get128();
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    return Matrix3(
-        Vector3( vec_madd( Z0, Y0, zero ) ),
-        Vector3( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
-        Vector3( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( float radians, Vector3 unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Matrix3 Matrix3::rotation( floatInVec radians, Vector3 unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    axis = unitVec.get128();
-    sincosf4( radians.get128(), &s, &c );
-    xxxx = vec_splat( axis, 0 );
-    yyyy = vec_splat( axis, 1 );
-    zzzz = vec_splat( axis, 2 );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    axisS = vec_madd( axis, s, zero );
-    negAxisS = negatef4( axisS );
-    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
-    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
-    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
-    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
-    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
-    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
-    return Matrix3(
-        Vector3( vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 ) ),
-        Vector3( vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 ) ),
-        Vector3( vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( Quat unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    return Matrix3(
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, boolInVec select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( float scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( floatInVec scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( mat.getCol3(), 1.0f );
-}
-
-inline Matrix4::Matrix4( Vector4 _col0, Vector4 _col1, Vector4 _col2, Vector4 _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, Vector3 translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4::Matrix4( Quat unitQuat, Vector3 translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4 & Matrix4::setCol0( Vector4 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( Vector4 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( Vector4 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( Vector4 _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, Vector4 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, Vector4 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, floatInVec val )
-{
-    Vector4 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline const floatInVec Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
-    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
-    tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
-    res0 = vec_mergeh( tmp0, tmp1 );
-    res1 = vec_mergel( tmp0, tmp1 );
-    res2 = vec_mergeh( tmp2, tmp3 );
-    res3 = vec_mergel( tmp2, tmp3 );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4( res1 ),
-        Vector4( res2 ),
-        Vector4( res3 )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vector float in0, in1, in2, in3;
-    vector float tmp0, tmp1, tmp2, tmp3;
-    vector float cof0, cof1, cof2, cof3;
-    vector float t0, t1, t2, t3;
-    vector float t01, t02, t03, t12, t23;
-    vector float t1r, t2r;
-    vector float t01r, t02r, t03r, t12r, t23r;
-    vector float t1r3, t1r3r;
-    vector float det, det0, det1, det2, det3, invdet;
-    vector float vzero = (vector float){0.0};
-    in0 = mat.getCol0().get128();
-    in1 = mat.getCol1().get128();
-    in2 = mat.getCol2().get128();
-    in3 = mat.getCol3().get128();
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
-    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
-    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
-    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
-    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
-    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
-    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
-    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
-    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
-    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
-    cof1 = vec_nmsub(t0, t23, vzero);		/* -(AGP ECL IOH MKD) */
-    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
-    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
-    cof1 = vec_madd(t0, t23r, cof1);		/* AOH EKD IGP MCL + cof1 */
-    cof1 = vec_sld(cof1, cof1, 8);		/* IGP MCL AOH EKD - IOH MKD AGP ECL */
-    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
-    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
-    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
-    cof3 = vec_madd(t0, t12, vzero);		/* ANG EJC IFO MBK */
-    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
-    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
-    cof3 = vec_nmsub(t0, t12r, cof3);		/* cof3 - AFO EBK ING MJC */
-    cof3 = vec_sld(cof3, cof3, 8);		/* ING MJC AFO EBK - IFO MBK ANG EJC */
-    t1r = vec_sld(t1, t1, 8);			/* B F J N */
-    t2r = vec_sld(t2, t2, 8);			/* K O C G */
-    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
-    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
-    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
-    cof2 = vec_madd(t0, t1r3, vzero);		/* AFP EBL INH MJD */
-    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
-    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
-    cof2 = vec_nmsub(t0, t1r3r, cof2);		/* cof2 - ANH EJD IFP MBL */
-    cof2 = vec_sld(cof2, cof2, 8);		/* IFP MBL ANH EJD - INH MJD AFP EBL */
-    t01 = vec_madd(t0, t1, vzero);		/* AJ EN IB MF */
-    t01 = vec_perm(t01, t01, _VECTORMATH_PERM_YXWZ);	/* EN AJ MF IB */
-    cof2 = vec_nmsub(t3, t01, cof2);		/* cof2 - LEN PAJ DMF HIB */
-    cof3 = vec_madd(t2r, t01, cof3);		/* KEN OAJ CMF GIB + cof3 */ 
-    t01r = vec_sld(t01, t01, 8);			/* MF IB EN AJ */
-    cof2 = vec_madd(t3, t01r, cof2);		/* LMF PIB DEN HAJ + cof2 */
-    cof3 = vec_nmsub(t2r, t01r, cof3);		/* cof3 - KMF OIB CEN GAJ */
-    t03 = vec_madd(t0, t3, vzero);		/* AL EP ID MH */
-    t03 = vec_perm(t03, t03, _VECTORMATH_PERM_YXWZ);	/* EP AL MH ID */
-    cof1 = vec_nmsub(t2r, t03, cof1);		/* cof1 - KEP OAL CMH GID */
-    cof2 = vec_madd(t1, t03, cof2);		/* JEP NAL BMH FID + cof2 */
-    t03r = vec_sld(t03, t03, 8);			/* MH ID EP AL */
-    cof1 = vec_madd(t2r, t03r, cof1);		/* KMH OID CEP GAL + cof1 */
-    cof2 = vec_nmsub(t1, t03r, cof2);		/* cof2 - JMH NID BEP FAL */ 
-    t02 = vec_madd(t0, t2r, vzero);		/* AK EO IC MG */
-    t02 = vec_perm(t02, t02, _VECTORMATH_PERM_YXWZ);	/* E0 AK MG IC */
-    cof1 = vec_madd(t3, t02, cof1);		/* LEO PAK DMG HIC + cof1 */
-    cof3 = vec_nmsub(t1, t02, cof3);		/* cof3 - JEO NAK BMG FIC */
-    t02r = vec_sld(t02, t02, 8);			/* MG IC EO AK */
-    cof1 = vec_nmsub(t3, t02r, cof1);		/* cof1 - LMG PIC DEO HAK */
-    cof3 = vec_madd(t1, t02r, cof3);		/* JMG NIC BEO FAK + cof3 */
-    /* Compute the determinant of the matrix 
-     *
-     * det = sum_across(t0 * cof0);
-     *
-     * We perform a sum across the entire vector so that 
-     * we don't have to splat the result when multiplying the
-     * cofactors by the inverse of the determinant.
-     */
-    det  = vec_madd(t0, cof0, vzero);
-    det0 = vec_splat(det, 0);
-    det1 = vec_splat(det, 1);
-    det2 = vec_splat(det, 2);
-    det3 = vec_splat(det, 3);
-    det  = vec_add(det0, det1);
-    det2 = vec_add(det2, det3);
-    det  = vec_add(det, det2);
-    /* Compute the reciprocal of the determinant.
-     */
-    invdet = recipf4(det);
-    /* Multiply the cofactors by the reciprocal of the determinant.
-     */ 
-    return Matrix4(
-        Vector4( vec_madd(cof0, invdet, vzero) ),
-        Vector4( vec_madd(cof1, invdet, vzero) ),
-        Vector4( vec_madd(cof2, invdet, vzero) ),
-        Vector4( vec_madd(cof3, invdet, vzero) )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline const floatInVec determinant( const Matrix4 & mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vector float in0, in1, in2, in3;
-    vector float tmp0, tmp1, tmp2, tmp3;
-    vector float cof0;
-    vector float t0, t1, t2, t3;
-    vector float t12, t23;
-    vector float t1r, t2r;
-    vector float t12r, t23r;
-    vector float t1r3, t1r3r;
-    vector float vzero = (vector float){0.0};
-    in0 = mat.getCol0().get128();
-    in1 = mat.getCol1().get128();
-    in2 = mat.getCol2().get128();
-    in3 = mat.getCol3().get128();
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
-    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
-    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
-    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
-    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
-    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
-    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
-    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
-    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
-    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
-    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
-    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
-    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
-    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
-    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
-    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
-    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
-    t1r = vec_sld(t1, t1, 8);			/* B F J N */
-    t2r = vec_sld(t2, t2, 8);			/* K O C G */
-    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
-    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
-    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
-    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
-    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
-    return floatInVec( _vmathVfDot4(t0,cof0), 0 );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Matrix4 Matrix4::operator *( floatInVec scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( float scalar )
-{
-    return *this *= floatInVec(scalar);
-}
-
-inline Matrix4 & Matrix4::operator *=( floatInVec scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
-{
-    return floatInVec(scalar) * mat;
-}
-
-inline const Matrix4 operator *( floatInVec scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( Vector4 vec ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz, wwww;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( vec.get128(), 0 );
-    yyyy = vec_splat( vec.get128(), 1 );
-    zzzz = vec_splat( vec.get128(), 2 );
-    wwww = vec_splat( vec.get128(), 3 );
-    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
-    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
-    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = vec_madd( mCol3.get128(), wwww, tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    return Vector4( res );
-}
-
-inline const Vector4 Matrix4::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec.get128(), 0 );
-    yyyy = vec_splat( vec.get128(), 1 );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( mCol1.get128(), yyyy, res );
-    res = vec_madd( mCol2.get128(), zzzz, res );
-    return Vector4( res );
-}
-
-inline const Vector4 Matrix4::operator *( Point3 pnt ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( pnt.get128(), 0 );
-    yyyy = vec_splat( pnt.get128(), 1 );
-    zzzz = vec_splat( pnt.get128(), 2 );
-    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
-    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
-    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = vec_add( mCol3.get128(), tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    return Vector4( res );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( Vector3 translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Matrix4 Matrix4::rotationX( floatInVec radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( res1 ),
-        Vector4( res2 ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Matrix4 Matrix4::rotationY( floatInVec radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4::yAxis( ),
-        Vector4( res2 ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Matrix4 Matrix4::rotationZ( floatInVec radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4( res1 ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    angles = Vector4( radiansXYZ, 0.0f ).get128();
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    return Matrix4(
-        Vector4( vec_madd( Z0, Y0, zero ) ),
-        Vector4( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
-        Vector4( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( float radians, Vector3 unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Matrix4 Matrix4::rotation( floatInVec radians, Vector3 unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    axis = unitVec.get128();
-    sincosf4( radians.get128(), &s, &c );
-    xxxx = vec_splat( axis, 0 );
-    yyyy = vec_splat( axis, 1 );
-    zzzz = vec_splat( axis, 2 );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    axisS = vec_madd( axis, s, zero );
-    negAxisS = negatef4( axisS );
-    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
-    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
-    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
-    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
-    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
-    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
-    zeroW = (vec_float4)_VECTORMATH_MASK_0x000F;
-    axis = vec_andc( axis, zeroW );
-    tmp0 = vec_andc( tmp0, zeroW );
-    tmp1 = vec_andc( tmp1, zeroW );
-    tmp2 = vec_andc( tmp2, zeroW );
-    return Matrix4(
-        Vector4( vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 ) ),
-        Vector4( vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 ) ),
-        Vector4( vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( Quat unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    return Matrix4(
-        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
-        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
-        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, 1.0f );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( Vector3 translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, 1.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    vec_float4 zero, col0, col1, col2, col3;
-    union { vec_float4 v; float s[4]; } tmp;
-    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
-    rangeInv = 1.0f / ( zNear - zFar );
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp.v = zero;
-    tmp.s[0] = f / aspect;
-    col0 = tmp.v;
-    tmp.v = zero;
-    tmp.s[1] = f;
-    col1 = tmp.v;
-    tmp.v = zero;
-    tmp.s[2] = ( zNear + zFar ) * rangeInv;
-    tmp.s[3] = -1.0f;
-    col2 = tmp.v;
-    tmp.v = zero;
-    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
-    col3 = tmp.v;
-    return Matrix4(
-        Vector4( col0 ),
-        Vector4( col1 ),
-        Vector4( col2 ),
-        Vector4( col3 )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff;
-    vec_float4 diagonal, column, near2;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
-    l.s[0] = left;
-    f.s[0] = zFar;
-    r.s[0] = right;
-    n.s[0] = zNear;
-    b.s[0] = bottom;
-    t.s[0] = top;
-    lbf = vec_mergeh( l.v, f.v );
-    rtn = vec_mergeh( r.v, n.v );
-    lbf = vec_mergeh( lbf, b.v );
-    rtn = vec_mergeh( rtn, t.v );
-    diff = vec_sub( rtn, lbf );
-    sum  = vec_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    near2 = vec_splat( n.v, 0 );
-    near2 = vec_add( near2, near2 );
-    diagonal = vec_madd( near2, inv_diff, zero );
-    column = vec_madd( sum, inv_diff, zero );
-    return Matrix4(
-        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 ) ),
-        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 ) ),
-        Vector4( vec_sel( column, ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}), _VECTORMATH_MASK_0x000F ) ),
-        Vector4( vec_sel( zero, vec_madd( diagonal, vec_splat( f.v, 0 ), zero ), _VECTORMATH_MASK_0x00F0 ) )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff, neg_inv_diff;
-    vec_float4 diagonal, column;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
-    l.s[0] = left;
-    f.s[0] = zFar;
-    r.s[0] = right;
-    n.s[0] = zNear;
-    b.s[0] = bottom;
-    t.s[0] = top;
-    lbf = vec_mergeh( l.v, f.v );
-    rtn = vec_mergeh( r.v, n.v );
-    lbf = vec_mergeh( lbf, b.v );
-    rtn = vec_mergeh( rtn, t.v );
-    diff = vec_sub( rtn, lbf );
-    sum  = vec_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    neg_inv_diff = negatef4( inv_diff );
-    diagonal = vec_add( inv_diff, inv_diff );
-    column = vec_madd( sum, vec_sel( neg_inv_diff, inv_diff, _VECTORMATH_MASK_0x00F0 ), zero );
-    return Matrix4(
-        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 ) ),
-        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 ) ),
-        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x00F0 ) ),
-        Vector4( vec_sel( column, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), _VECTORMATH_MASK_0x000F ) )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, boolInVec select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-    print( mat.getRow( 3 ) );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( floatInVec scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( Vector3 _col0, Vector3 _col1, Vector3 _col2, Vector3 _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, Vector3 translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( Quat unitQuat, Vector3 translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3 & Transform3::setCol0( Vector3 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( Vector3 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( Vector3 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( Vector3 _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, Vector3 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, Vector4 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, floatInVec val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline const floatInVec Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
-    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
-    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
-    inv3 = negatef4( tfrm.getCol3().get128() );
-    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
-    dot = vec_splat( dot, 0 );
-    invdet = recipf4( dot );
-    tmp3 = vec_mergeh( tmp0, tmp2 );
-    tmp4 = vec_mergel( tmp0, tmp2 );
-    inv0 = vec_mergeh( tmp3, tmp1 );
-    xxxx = vec_splat( inv3, 0 );
-    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( inv3, 1 );
-    zzzz = vec_splat( inv3, 2 );
-    inv3 = vec_madd( inv0, xxxx, zero );
-    inv3 = vec_madd( inv1, yyyy, inv3 );
-    inv3 = vec_madd( inv2, zzzz, inv3 );
-    inv0 = vec_madd( inv0, invdet, zero );
-    inv1 = vec_madd( inv1, invdet, zero );
-    inv2 = vec_madd( inv2, invdet, zero );
-    inv3 = vec_madd( inv3, invdet, zero );
-    return Transform3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 ),
-        Vector3( inv3 )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1;
-    vec_float4 xxxx, yyyy, zzzz;
-    tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
-    tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
-    inv3 = negatef4( tfrm.getCol3().get128() );
-    inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
-    xxxx = vec_splat( inv3, 0 );
-    inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
-    inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( inv3, 1 );
-    zzzz = vec_splat( inv3, 2 );
-    inv3 = vec_madd( inv0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    inv3 = vec_madd( inv1, yyyy, inv3 );
-    inv3 = vec_madd( inv2, zzzz, inv3 );
-    return Transform3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 ),
-        Vector3( inv3 )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    xxxx = vec_splat( vec.get128(), 0 );
-    yyyy = vec_splat( vec.get128(), 1 );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( mCol1.get128(), yyyy, res );
-    res = vec_madd( mCol2.get128(), zzzz, res );
-    return Vector3( res );
-}
-
-inline const Point3 Transform3::operator *( Point3 pnt ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    xxxx = vec_splat( pnt.get128(), 0 );
-    yyyy = vec_splat( pnt.get128(), 1 );
-    zzzz = vec_splat( pnt.get128(), 2 );
-    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
-    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
-    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = vec_add( mCol3.get128(), tmp1 );
-    res = vec_add( tmp0, tmp1 );
-    return Point3( res );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( Vector3 translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Transform3 Transform3::rotationX( floatInVec radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res1 = vec_sel( zero, c, select_y );
-    res1 = vec_sel( res1, s, select_z );
-    res2 = vec_sel( zero, negatef4(s), select_y );
-    res2 = vec_sel( res2, c, select_z );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( res1 ),
-        Vector3( res2 ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Transform3 Transform3::rotationY( floatInVec radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_z = _VECTORMATH_MASK_0x00F0;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, negatef4(s), select_z );
-    res2 = vec_sel( zero, s, select_x );
-    res2 = vec_sel( res2, c, select_z );
-    return Transform3(
-        Vector3( res0 ),
-        Vector3::yAxis( ),
-        Vector3( res2 ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Transform3 Transform3::rotationZ( floatInVec radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = _VECTORMATH_MASK_0xF000;
-    select_y = _VECTORMATH_MASK_0x0F00;
-    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    sincosf4( radians.get128(), &s, &c );
-    res0 = vec_sel( zero, c, select_x );
-    res0 = vec_sel( res0, s, select_y );
-    res1 = vec_sel( zero, negatef4(s), select_x );
-    res1 = vec_sel( res1, c, select_y );
-    return Transform3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    angles = Vector4( radiansXYZ, 0.0f ).get128();
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = vec_mergel( c, s );
-    Z1 = vec_mergel( negS, c );
-    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
-    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
-    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
-    X0 = vec_splat( s, 0 );
-    X1 = vec_splat( c, 0 );
-    tmp = vec_madd( Z0, Y1, zero );
-    return Transform3(
-        Vector3( vec_madd( Z0, Y0, zero ) ),
-        Vector3( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
-        Vector3( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotation( float radians, Vector3 unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Transform3 Transform3::rotation( floatInVec radians, Vector3 unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::rotation( Quat unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-    return Transform3(
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
-        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( Vector3 translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, boolInVec select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    print( tfrm.getRow( 0 ) );
-    print( tfrm.getRow( 1 ) );
-    print( tfrm.getRow( 2 ) );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    vec_float4 res;
-    vec_float4 col0, col1, col2;
-    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
-    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
-    vec_float4 radicand, invSqrt, scale;
-    vec_float4 res0, res1, res2, res3;
-    vec_float4 xx, yy, zz;
-    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
-    vec_uint4 select_y = _VECTORMATH_MASK_0x0F00;
-    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
-    vec_uint4 select_w = _VECTORMATH_MASK_0x000F;
-    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-
-    col0 = tfrm.getCol0().get128();
-    col1 = tfrm.getCol1().get128();
-    col2 = tfrm.getCol2().get128();
-
-    /* four cases: */
-    /* trace > 0 */
-    /* else */
-    /*    xx largest diagonal element */
-    /*    yy largest diagonal element */
-    /*    zz largest diagonal element */
-
-    /* compute quaternion for each case */
-
-    xx_yy = vec_sel( col0, col1, select_y );
-    xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
-    yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
-    zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
-
-    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    invSqrt = rsqrtf4( radicand );
-
-    zy_xz_yx = vec_sel( col0, col1, select_z );
-    zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
-    yz_zx_xy = vec_sel( col0, col1, select_x );
-    yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
-
-    sum = vec_add( zy_xz_yx, yz_zx_xy );
-    diff = vec_sub( zy_xz_yx, yz_zx_xy );
-
-    scale = vec_madd( invSqrt, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), zero );
-    res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
-    res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
-    res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
-    res3 = diff;
-    res0 = vec_sel( res0, radicand, select_x );
-    res1 = vec_sel( res1, radicand, select_y );
-    res2 = vec_sel( res2, radicand, select_z );
-    res3 = vec_sel( res3, radicand, select_w );
-    res0 = vec_madd( res0, vec_splat( scale, 0 ), zero );
-    res1 = vec_madd( res1, vec_splat( scale, 1 ), zero );
-    res2 = vec_madd( res2, vec_splat( scale, 2 ), zero );
-    res3 = vec_madd( res3, vec_splat( scale, 3 ), zero );
-
-    /* determine case and select answer */
-
-    xx = vec_splat( col0, 0 );
-    yy = vec_splat( col1, 1 );
-    zz = vec_splat( col2, 2 );
-    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
-    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
-    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), zero ) );
-    mVec128 = res;
-}
-
-inline const Matrix3 outer( Vector3 tfrm0, Vector3 tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( Vector4 tfrm0, Vector4 tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
-    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
-    xxxx = vec_splat( vec.get128(), 0 );
-    mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
-    mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
-    mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
-    yyyy = vec_splat( vec.get128(), 1 );
-    res = vec_madd( mcol0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zzzz = vec_splat( vec.get128(), 2 );
-    res = vec_madd( mcol1, yyyy, res );
-    res = vec_madd( mcol2, zzzz, res );
-    return Vector3( res );
-}
-
-inline const Matrix3 crossMatrix( Vector3 vec )
-{
-    vec_float4 neg, res0, res1, res2;
-    neg = negatef4( vec.get128() );
-    res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
-    res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
-    res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
-    res0 = vec_andc( res0, (vec_float4)_VECTORMATH_MASK_0xF000 );
-    res1 = vec_andc( res1, (vec_float4)_VECTORMATH_MASK_0x0F00 );
-    res2 = vec_andc( res2, (vec_float4)_VECTORMATH_MASK_0x00F0 );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
+#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
+#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
+#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( floatInVec scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( Quat unitQuat )
+{
+    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
+    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
+    xyzw_2 = vec_add( unitQuat.get128(), unitQuat.get128() );
+    wwww = vec_splat( unitQuat.get128(), 3 );
+    yzxw = vec_perm( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_PERM_YZXW );
+    zxyw = vec_perm( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_PERM_ZXYW );
+    yzxw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_YZXW );
+    zxyw_2 = vec_perm( xyzw_2, xyzw_2, _VECTORMATH_PERM_ZXYW );
+    tmp0 = vec_madd( yzxw_2, wwww, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_nmsub( yzxw, yzxw_2, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    tmp2 = vec_madd( yzxw, xyzw_2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp0 = vec_madd( zxyw, xyzw_2, tmp0 );
+    tmp1 = vec_nmsub( zxyw, zxyw_2, tmp1 );
+    tmp2 = vec_nmsub( zxyw_2, wwww, tmp2 );
+    tmp3 = vec_sel( tmp0, tmp1, select_x );
+    tmp4 = vec_sel( tmp1, tmp2, select_x );
+    tmp5 = vec_sel( tmp2, tmp0, select_x );
+    mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
+    mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
+    mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
+}
+
+inline Matrix3::Matrix3( Vector3 _col0, Vector3 _col1, Vector3 _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3 & Matrix3::setCol0( Vector3 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( Vector3 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( Vector3 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, Vector3 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, Vector3 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, floatInVec val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline const floatInVec Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, res0, res1, res2;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
+    res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+    res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
+    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
+    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
+    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+    inv0 = vec_madd( inv0, invdet, zero );
+    inv1 = vec_madd( inv1, invdet, zero );
+    inv2 = vec_madd( inv2, invdet, zero );
+    return Matrix3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 )
+    );
+}
+
+inline const floatInVec determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Matrix3 Matrix3::operator *( floatInVec scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+inline Matrix3 & Matrix3::operator *=( floatInVec scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return floatInVec(scalar) * mat;
+}
+
+inline const Matrix3 operator *( floatInVec scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Matrix3 Matrix3::rotationX( floatInVec radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Matrix3 Matrix3::rotationY( floatInVec radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Matrix3 Matrix3::rotationZ( floatInVec radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    return Matrix3(
+        Vector3( vec_madd( Z0, Y0, zero ) ),
+        Vector3( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
+        Vector3( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( float radians, Vector3 unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Matrix3 Matrix3::rotation( floatInVec radians, Vector3 unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    axis = unitVec.get128();
+    sincosf4( radians.get128(), &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    axisS = vec_madd( axis, s, zero );
+    negAxisS = negatef4( axisS );
+    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
+    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
+    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
+    return Matrix3(
+        Vector3( vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 ) ),
+        Vector3( vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 ) ),
+        Vector3( vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( Quat unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    return Matrix3(
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, boolInVec select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( floatInVec scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+inline Matrix4::Matrix4( Vector4 _col0, Vector4 _col1, Vector4 _col2, Vector4 _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, Vector3 translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4::Matrix4( Quat unitQuat, Vector3 translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4 & Matrix4::setCol0( Vector4 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( Vector4 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( Vector4 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( Vector4 _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, Vector4 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, Vector4 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, floatInVec val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline const floatInVec Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
+    tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
+    res0 = vec_mergeh( tmp0, tmp1 );
+    res1 = vec_mergel( tmp0, tmp1 );
+    res2 = vec_mergeh( tmp2, tmp3 );
+    res3 = vec_mergel( tmp2, tmp3 );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4( res3 )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vector float in0, in1, in2, in3;
+    vector float tmp0, tmp1, tmp2, tmp3;
+    vector float cof0, cof1, cof2, cof3;
+    vector float t0, t1, t2, t3;
+    vector float t01, t02, t03, t12, t23;
+    vector float t1r, t2r;
+    vector float t01r, t02r, t03r, t12r, t23r;
+    vector float t1r3, t1r3r;
+    vector float det, det0, det1, det2, det3, invdet;
+    vector float vzero = (vector float){0.0};
+    in0 = mat.getCol0().get128();
+    in1 = mat.getCol1().get128();
+    in2 = mat.getCol2().get128();
+    in3 = mat.getCol3().get128();
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
+    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
+    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
+    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
+    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
+    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
+    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
+    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
+    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
+    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
+    cof1 = vec_nmsub(t0, t23, vzero);		/* -(AGP ECL IOH MKD) */
+    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
+    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
+    cof1 = vec_madd(t0, t23r, cof1);		/* AOH EKD IGP MCL + cof1 */
+    cof1 = vec_sld(cof1, cof1, 8);		/* IGP MCL AOH EKD - IOH MKD AGP ECL */
+    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
+    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
+    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
+    cof3 = vec_madd(t0, t12, vzero);		/* ANG EJC IFO MBK */
+    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
+    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
+    cof3 = vec_nmsub(t0, t12r, cof3);		/* cof3 - AFO EBK ING MJC */
+    cof3 = vec_sld(cof3, cof3, 8);		/* ING MJC AFO EBK - IFO MBK ANG EJC */
+    t1r = vec_sld(t1, t1, 8);			/* B F J N */
+    t2r = vec_sld(t2, t2, 8);			/* K O C G */
+    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
+    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
+    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
+    cof2 = vec_madd(t0, t1r3, vzero);		/* AFP EBL INH MJD */
+    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
+    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
+    cof2 = vec_nmsub(t0, t1r3r, cof2);		/* cof2 - ANH EJD IFP MBL */
+    cof2 = vec_sld(cof2, cof2, 8);		/* IFP MBL ANH EJD - INH MJD AFP EBL */
+    t01 = vec_madd(t0, t1, vzero);		/* AJ EN IB MF */
+    t01 = vec_perm(t01, t01, _VECTORMATH_PERM_YXWZ);	/* EN AJ MF IB */
+    cof2 = vec_nmsub(t3, t01, cof2);		/* cof2 - LEN PAJ DMF HIB */
+    cof3 = vec_madd(t2r, t01, cof3);		/* KEN OAJ CMF GIB + cof3 */ 
+    t01r = vec_sld(t01, t01, 8);			/* MF IB EN AJ */
+    cof2 = vec_madd(t3, t01r, cof2);		/* LMF PIB DEN HAJ + cof2 */
+    cof3 = vec_nmsub(t2r, t01r, cof3);		/* cof3 - KMF OIB CEN GAJ */
+    t03 = vec_madd(t0, t3, vzero);		/* AL EP ID MH */
+    t03 = vec_perm(t03, t03, _VECTORMATH_PERM_YXWZ);	/* EP AL MH ID */
+    cof1 = vec_nmsub(t2r, t03, cof1);		/* cof1 - KEP OAL CMH GID */
+    cof2 = vec_madd(t1, t03, cof2);		/* JEP NAL BMH FID + cof2 */
+    t03r = vec_sld(t03, t03, 8);			/* MH ID EP AL */
+    cof1 = vec_madd(t2r, t03r, cof1);		/* KMH OID CEP GAL + cof1 */
+    cof2 = vec_nmsub(t1, t03r, cof2);		/* cof2 - JMH NID BEP FAL */ 
+    t02 = vec_madd(t0, t2r, vzero);		/* AK EO IC MG */
+    t02 = vec_perm(t02, t02, _VECTORMATH_PERM_YXWZ);	/* E0 AK MG IC */
+    cof1 = vec_madd(t3, t02, cof1);		/* LEO PAK DMG HIC + cof1 */
+    cof3 = vec_nmsub(t1, t02, cof3);		/* cof3 - JEO NAK BMG FIC */
+    t02r = vec_sld(t02, t02, 8);			/* MG IC EO AK */
+    cof1 = vec_nmsub(t3, t02r, cof1);		/* cof1 - LMG PIC DEO HAK */
+    cof3 = vec_madd(t1, t02r, cof3);		/* JMG NIC BEO FAK + cof3 */
+    /* Compute the determinant of the matrix 
+     *
+     * det = sum_across(t0 * cof0);
+     *
+     * We perform a sum across the entire vector so that 
+     * we don't have to splat the result when multiplying the
+     * cofactors by the inverse of the determinant.
+     */
+    det  = vec_madd(t0, cof0, vzero);
+    det0 = vec_splat(det, 0);
+    det1 = vec_splat(det, 1);
+    det2 = vec_splat(det, 2);
+    det3 = vec_splat(det, 3);
+    det  = vec_add(det0, det1);
+    det2 = vec_add(det2, det3);
+    det  = vec_add(det, det2);
+    /* Compute the reciprocal of the determinant.
+     */
+    invdet = recipf4(det);
+    /* Multiply the cofactors by the reciprocal of the determinant.
+     */ 
+    return Matrix4(
+        Vector4( vec_madd(cof0, invdet, vzero) ),
+        Vector4( vec_madd(cof1, invdet, vzero) ),
+        Vector4( vec_madd(cof2, invdet, vzero) ),
+        Vector4( vec_madd(cof3, invdet, vzero) )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline const floatInVec determinant( const Matrix4 & mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vector float in0, in1, in2, in3;
+    vector float tmp0, tmp1, tmp2, tmp3;
+    vector float cof0;
+    vector float t0, t1, t2, t3;
+    vector float t12, t23;
+    vector float t1r, t2r;
+    vector float t12r, t23r;
+    vector float t1r3, t1r3r;
+    vector float vzero = (vector float){0.0};
+    in0 = mat.getCol0().get128();
+    in1 = mat.getCol1().get128();
+    in2 = mat.getCol2().get128();
+    in3 = mat.getCol3().get128();
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = vec_perm(in0, in1, _VECTORMATH_PERM_XAZC);	/* A E C G */
+    tmp1 = vec_perm(in2, in3, _VECTORMATH_PERM_XAZC);	/* I M K O */
+    tmp2 = vec_perm(in0, in1, _VECTORMATH_PERM_YBWD);	/* B F D H */
+    tmp3 = vec_perm(in2, in3, _VECTORMATH_PERM_YBWD);	/* J N L P */
+    t0 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_XYAB);	/* A E I M */
+    t1 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_XYAB);	/* J N B F */
+    t2 = vec_perm(tmp0, tmp1, _VECTORMATH_PERM_ZWCD);	/* C G K O */
+    t3 = vec_perm(tmp3, tmp2, _VECTORMATH_PERM_ZWCD);	/* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = vec_madd(t2, t3, vzero);		/* CL GP KD OH */
+    t23 = vec_perm(t23, t23, _VECTORMATH_PERM_YXWZ);	/* GP CL OH KD */
+    cof0 = vec_nmsub(t1, t23, vzero);		/* -(JGP NCL FOH BKD) */
+    t23r = vec_sld(t23, t23, 8);			/* OH KD GP CL */
+    cof0 = vec_madd(t1, t23r, cof0);		/* JOH NKD BGP FCL + cof0 */
+    t12 = vec_madd(t1, t2, vzero);		/* JC NG BK FO */
+    t12 = vec_perm(t12, t12, _VECTORMATH_PERM_YXWZ);	/* NG JC FO BK */
+    cof0 = vec_madd(t3, t12, cof0);		/* LNG PJC DFO HBK + cof0 */
+    t12r = vec_sld(t12, t12, 8);			/* FO BK NG JC */
+    cof0 = vec_nmsub(t3, t12r, cof0);		/* cof0 - LFO PBK DNG HJC */
+    t1r = vec_sld(t1, t1, 8);			/* B F J N */
+    t2r = vec_sld(t2, t2, 8);			/* K O C G */
+    t1r3 = vec_madd(t1r, t3, vzero);		/* BL FP JD NH */
+    t1r3 = vec_perm(t1r3, t1r3, _VECTORMATH_PERM_YXWZ);	/* FP BL NH JD */
+    cof0 = vec_madd(t2r, t1r3, cof0);		/* KFP OBL CNH GJD + cof0 */
+    t1r3r = vec_sld(t1r3, t1r3, 8);		/* NH JD FP BL */
+    cof0 = vec_nmsub(t2r, t1r3r, cof0);		/* cof0 - KNH OJD CFP GBL */
+    return floatInVec( _vmathVfDot4(t0,cof0), 0 );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Matrix4 Matrix4::operator *( floatInVec scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+inline Matrix4 & Matrix4::operator *=( floatInVec scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return floatInVec(scalar) * mat;
+}
+
+inline const Matrix4 operator *( floatInVec scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( Vector4 vec ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz, wwww;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    wwww = vec_splat( vec.get128(), 3 );
+    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
+    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
+    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = vec_madd( mCol3.get128(), wwww, tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    return Vector4( res );
+}
+
+inline const Vector4 Matrix4::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector4( res );
+}
+
+inline const Vector4 Matrix4::operator *( Point3 pnt ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( pnt.get128(), 0 );
+    yyyy = vec_splat( pnt.get128(), 1 );
+    zzzz = vec_splat( pnt.get128(), 2 );
+    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
+    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
+    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = vec_add( mCol3.get128(), tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    return Vector4( res );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( Vector3 translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Matrix4 Matrix4::rotationX( floatInVec radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Matrix4 Matrix4::rotationY( floatInVec radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4::yAxis( ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Matrix4 Matrix4::rotationZ( floatInVec radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    return Matrix4(
+        Vector4( vec_madd( Z0, Y0, zero ) ),
+        Vector4( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
+        Vector4( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( float radians, Vector3 unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Matrix4 Matrix4::rotation( floatInVec radians, Vector3 unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    axis = unitVec.get128();
+    sincosf4( radians.get128(), &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    axisS = vec_madd( axis, s, zero );
+    negAxisS = negatef4( axisS );
+    tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+    tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+    tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+    tmp0 = vec_sel( tmp0, c, _VECTORMATH_MASK_0xF000 );
+    tmp1 = vec_sel( tmp1, c, _VECTORMATH_MASK_0x0F00 );
+    tmp2 = vec_sel( tmp2, c, _VECTORMATH_MASK_0x00F0 );
+    zeroW = (vec_float4)_VECTORMATH_MASK_0x000F;
+    axis = vec_andc( axis, zeroW );
+    tmp0 = vec_andc( tmp0, zeroW );
+    tmp1 = vec_andc( tmp1, zeroW );
+    tmp2 = vec_andc( tmp2, zeroW );
+    return Matrix4(
+        Vector4( vec_madd( vec_madd( axis, xxxx, zero ), oneMinusC, tmp0 ) ),
+        Vector4( vec_madd( vec_madd( axis, yyyy, zero ), oneMinusC, tmp1 ) ),
+        Vector4( vec_madd( vec_madd( axis, zzzz, zero ), oneMinusC, tmp2 ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( Quat unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    return Matrix4(
+        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
+        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
+        Vector4( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( Vector3 translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    vec_float4 zero, col0, col1, col2, col3;
+    union { vec_float4 v; float s[4]; } tmp;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp.v = zero;
+    tmp.s[0] = f / aspect;
+    col0 = tmp.v;
+    tmp.v = zero;
+    tmp.s[1] = f;
+    col1 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = ( zNear + zFar ) * rangeInv;
+    tmp.s[3] = -1.0f;
+    col2 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
+    col3 = tmp.v;
+    return Matrix4(
+        Vector4( col0 ),
+        Vector4( col1 ),
+        Vector4( col2 ),
+        Vector4( col3 )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff;
+    vec_float4 diagonal, column, near2;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = vec_splat( n.v, 0 );
+    near2 = vec_add( near2, near2 );
+    diagonal = vec_madd( near2, inv_diff, zero );
+    column = vec_madd( sum, inv_diff, zero );
+    return Matrix4(
+        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 ) ),
+        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 ) ),
+        Vector4( vec_sel( column, ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}), _VECTORMATH_MASK_0x000F ) ),
+        Vector4( vec_sel( zero, vec_madd( diagonal, vec_splat( f.v, 0 ), zero ), _VECTORMATH_MASK_0x00F0 ) )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff, neg_inv_diff;
+    vec_float4 diagonal, column;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    union { vec_float4 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = vec_add( inv_diff, inv_diff );
+    column = vec_madd( sum, vec_sel( neg_inv_diff, inv_diff, _VECTORMATH_MASK_0x00F0 ), zero );
+    return Matrix4(
+        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0xF000 ) ),
+        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x0F00 ) ),
+        Vector4( vec_sel( zero, diagonal, _VECTORMATH_MASK_0x00F0 ) ),
+        Vector4( vec_sel( column, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), _VECTORMATH_MASK_0x000F ) )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, boolInVec select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( floatInVec scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( Vector3 _col0, Vector3 _col1, Vector3 _col2, Vector3 _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, Vector3 translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( Quat unitQuat, Vector3 translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3 & Transform3::setCol0( Vector3 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( Vector3 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( Vector3 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( Vector3 _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, Vector3 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, Vector4 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, floatInVec val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline const floatInVec Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
+    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
+    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    xxxx = vec_splat( inv3, 0 );
+    inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_madd( inv0, xxxx, zero );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    inv0 = vec_madd( inv0, invdet, zero );
+    inv1 = vec_madd( inv1, invdet, zero );
+    inv2 = vec_madd( inv2, invdet, zero );
+    inv3 = vec_madd( inv3, invdet, zero );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1;
+    vec_float4 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
+    tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
+    xxxx = vec_splat( inv3, 0 );
+    inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+    inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_madd( inv0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mCol0.get128(), xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+inline const Point3 Transform3::operator *( Point3 pnt ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    xxxx = vec_splat( pnt.get128(), 0 );
+    yyyy = vec_splat( pnt.get128(), 1 );
+    zzzz = vec_splat( pnt.get128(), 2 );
+    tmp0 = vec_madd( mCol0.get128(), xxxx, zero );
+    tmp1 = vec_madd( mCol1.get128(), yyyy, zero );
+    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = vec_add( mCol3.get128(), tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    return Point3( res );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( Vector3 translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Transform3 Transform3::rotationX( floatInVec radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Transform3 Transform3::rotationY( floatInVec radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_z = _VECTORMATH_MASK_0x00F0;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Transform3 Transform3::rotationZ( floatInVec radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = _VECTORMATH_MASK_0xF000;
+    select_y = _VECTORMATH_MASK_0x0F00;
+    zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+    Z1 = vec_andc( Z1, (vec_float4)_VECTORMATH_MASK_0x000F );
+    Y0 = vec_perm( negS, c, _VECTORMATH_PERM_BBYX );
+    Y1 = vec_perm( c, s, _VECTORMATH_PERM_BBYX );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_madd( Z0, Y1, zero );
+    return Transform3(
+        Vector3( vec_madd( Z0, Y0, zero ) ),
+        Vector3( vec_madd( Z1, X1, vec_madd( tmp, X0, zero ) ) ),
+        Vector3( vec_nmsub( Z1, X0, vec_madd( tmp, X1, zero ) ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotation( float radians, Vector3 unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Transform3 Transform3::rotation( floatInVec radians, Vector3 unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::rotation( Quat unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+    return Transform3(
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0xF000 ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x0F00 ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), _VECTORMATH_MASK_0x00F0 ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( Vector3 translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, boolInVec select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    vec_float4 res;
+    vec_float4 col0, col1, col2;
+    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
+    vec_float4 radicand, invSqrt, scale;
+    vec_float4 res0, res1, res2, res3;
+    vec_float4 xx, yy, zz;
+    vec_uint4 select_x = _VECTORMATH_MASK_0xF000;
+    vec_uint4 select_y = _VECTORMATH_MASK_0x0F00;
+    vec_uint4 select_z = _VECTORMATH_MASK_0x00F0;
+    vec_uint4 select_w = _VECTORMATH_MASK_0x000F;
+    vec_float4 zero = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+
+    col0 = tfrm.getCol0().get128();
+    col1 = tfrm.getCol1().get128();
+    col2 = tfrm.getCol2().get128();
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = vec_sel( col0, col1, select_y );
+    xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
+    yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
+    zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
+
+    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    invSqrt = rsqrtf4( radicand );
+
+    zy_xz_yx = vec_sel( col0, col1, select_z );
+    zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
+    yz_zx_xy = vec_sel( col0, col1, select_x );
+    yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
+
+    sum = vec_add( zy_xz_yx, yz_zx_xy );
+    diff = vec_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = vec_madd( invSqrt, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), zero );
+    res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
+    res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
+    res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
+    res3 = diff;
+    res0 = vec_sel( res0, radicand, select_x );
+    res1 = vec_sel( res1, radicand, select_y );
+    res2 = vec_sel( res2, radicand, select_z );
+    res3 = vec_sel( res3, radicand, select_w );
+    res0 = vec_madd( res0, vec_splat( scale, 0 ), zero );
+    res1 = vec_madd( res1, vec_splat( scale, 1 ), zero );
+    res2 = vec_madd( res2, vec_splat( scale, 2 ), zero );
+    res3 = vec_madd( res3, vec_splat( scale, 3 ), zero );
+
+    /* determine case and select answer */
+
+    xx = vec_splat( col0, 0 );
+    yy = vec_splat( col1, 1 );
+    zz = vec_splat( col2, 2 );
+    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
+    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
+    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), zero ) );
+    mVec128 = res;
+}
+
+inline const Matrix3 outer( Vector3 tfrm0, Vector3 tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( Vector4 tfrm0, Vector4 tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    xxxx = vec_splat( vec.get128(), 0 );
+    mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
+    mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+    mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+    yyyy = vec_splat( vec.get128(), 1 );
+    res = vec_madd( mcol0, xxxx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mcol1, yyyy, res );
+    res = vec_madd( mcol2, zzzz, res );
+    return Vector3( res );
+}
+
+inline const Matrix3 crossMatrix( Vector3 vec )
+{
+    vec_float4 neg, res0, res1, res2;
+    neg = negatef4( vec.get128() );
+    res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
+    res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
+    res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
+    res0 = vec_andc( res0, (vec_float4)_VECTORMATH_MASK_0xF000 );
+    res1 = vec_andc( res1, (vec_float4)_VECTORMATH_MASK_0x0F00 );
+    res2 = vec_andc( res2, (vec_float4)_VECTORMATH_MASK_0x00F0 );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_soa.h
index 7868cfd4b..8c5d8319a 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/mat_soa.h
@@ -1,1744 +1,1744 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_CPP_H
-#define _VECTORMATH_MAT_SOA_CPP_H
-
-namespace Vectormath {
-namespace Soa {
-
-//-----------------------------------------------------------------------------
-// Constants
-
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( vec_float4 scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( const Quat & unitQuat )
-{
-    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat.getX();
-    qy = unitQuat.getY();
-    qz = unitQuat.getZ();
-    qw = unitQuat.getW();
-    qx2 = vec_add( qx, qx );
-    qy2 = vec_add( qy, qy );
-    qz2 = vec_add( qz, qz );
-    qxqx2 = vec_madd( qx, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqy2 = vec_madd( qx, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqz2 = vec_madd( qx, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qxqw2 = vec_madd( qw, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqy2 = vec_madd( qy, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqz2 = vec_madd( qy, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qyqw2 = vec_madd( qw, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qzqz2 = vec_madd( qz, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qzqw2 = vec_madd( qw, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol0 = Vector3( vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qyqy2 ), qzqz2 ), vec_add( qxqy2, qzqw2 ), vec_sub( qxqz2, qyqw2 ) );
-    mCol1 = Vector3( vec_sub( qxqy2, qzqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qzqz2 ), vec_add( qyqz2, qxqw2 ) );
-    mCol2 = Vector3( vec_add( qxqz2, qyqw2 ), vec_sub( qyqz2, qxqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qyqy2 ) );
-}
-
-inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3::Matrix3( const Aos::Matrix3 & mat )
-{
-    mCol0 = Vector3( mat.getCol0() );
-    mCol1 = Vector3( mat.getCol1() );
-    mCol2 = Vector3( mat.getCol2() );
-}
-
-inline Matrix3::Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 )
-{
-    mCol0 = Vector3( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
-    mCol1 = Vector3( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
-    mCol2 = Vector3( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
-}
-
-inline void Matrix3::get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const
-{
-    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol0( tmpV3_0 );
-    result1.setCol0( tmpV3_1 );
-    result2.setCol0( tmpV3_2 );
-    result3.setCol0( tmpV3_3 );
-    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol1( tmpV3_0 );
-    result1.setCol1( tmpV3_1 );
-    result2.setCol1( tmpV3_2 );
-    result3.setCol1( tmpV3_3 );
-    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol2( tmpV3_0 );
-    result1.setCol2( tmpV3_1 );
-    result2.setCol2( tmpV3_2 );
-    result3.setCol2( tmpV3_3 );
-}
-
-inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, vec_float4 val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    return Matrix3(
-        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
-        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
-        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    Vector3 tmp0, tmp1, tmp2;
-    vec_float4 detinv;
-    tmp0 = cross( mat.getCol1(), mat.getCol2() );
-    tmp1 = cross( mat.getCol2(), mat.getCol0() );
-    tmp2 = cross( mat.getCol0(), mat.getCol1() );
-    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( mat.getCol2(), tmp2 ) );
-    return Matrix3(
-        Vector3( vec_madd( tmp0.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        Vector3( vec_madd( tmp0.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        Vector3( vec_madd( tmp0.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline vec_float4 determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( vec_float4 scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) ),
-        Vector3::yAxis( ),
-        Vector3( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    return Matrix3(
-        Vector3( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) ),
-        Vector3( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        Vector3( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    return Matrix3(
-        Vector3( vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) ),
-        Vector3( vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) ),
-        Vector3( vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
-{
-    return Matrix3(
-        Vector3( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ() )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    Aos::Matrix3 mat0, mat1, mat2, mat3;
-    mat.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( vec_float4 scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol3 = Vector4( mat.getCol3(), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol3 = Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    mCol3 = Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline Matrix4::Matrix4( const Aos::Matrix4 & mat )
-{
-    mCol0 = Vector4( mat.getCol0() );
-    mCol1 = Vector4( mat.getCol1() );
-    mCol2 = Vector4( mat.getCol2() );
-    mCol3 = Vector4( mat.getCol3() );
-}
-
-inline Matrix4::Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 )
-{
-    mCol0 = Vector4( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
-    mCol1 = Vector4( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
-    mCol2 = Vector4( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
-    mCol3 = Vector4( mat0.getCol3(), mat1.getCol3(), mat2.getCol3(), mat3.getCol3() );
-}
-
-inline void Matrix4::get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const
-{
-    Aos::Vector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    mCol0.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol0( tmpV4_0 );
-    result1.setCol0( tmpV4_1 );
-    result2.setCol0( tmpV4_2 );
-    result3.setCol0( tmpV4_3 );
-    mCol1.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol1( tmpV4_0 );
-    result1.setCol1( tmpV4_1 );
-    result2.setCol1( tmpV4_2 );
-    result3.setCol1( tmpV4_3 );
-    mCol2.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol2( tmpV4_0 );
-    result1.setCol2( tmpV4_1 );
-    result2.setCol2( tmpV4_2 );
-    result3.setCol2( tmpV4_3 );
-    mCol3.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol3( tmpV4_0 );
-    result1.setCol3( tmpV4_1 );
-    result2.setCol3( tmpV4_2 );
-    result3.setCol3( tmpV4_3 );
-}
-
-inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, vec_float4 val )
-{
-    Vector4 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    return Matrix4(
-        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
-        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
-        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
-        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    Vector4 res0, res1, res2, res3;
-    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res0.setX( vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    res0.setY( vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    res0.setZ( vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    res0.setW( vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    detInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_add( vec_add( vec_add( vec_madd( mA, res0.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, res0.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, res0.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, res0.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    res1.setX( vec_madd( mI, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res1.setY( vec_madd( mM, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res1.setZ( vec_madd( mA, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res1.setW( vec_madd( mE, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res3.setX( vec_madd( mI, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res3.setY( vec_madd( mM, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res3.setZ( vec_madd( mA, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res3.setW( vec_madd( mE, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res2.setX( vec_madd( mI, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res2.setY( vec_madd( mM, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res2.setZ( vec_madd( mA, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res2.setW( vec_madd( mE, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp0 = vec_sub( vec_madd( mI, mB, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mJ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mM, mF, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mN, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mI, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mM, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mI, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mM, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    res2.setX( vec_add( vec_sub( vec_madd( mL, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getX() ) );
-    res2.setY( vec_add( vec_sub( vec_madd( mP, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getY() ) );
-    res2.setZ( vec_sub( vec_sub( vec_madd( mB, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mD, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getZ() ) );
-    res2.setW( vec_sub( vec_sub( vec_madd( mF, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mH, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getW() ) );
-    res3.setX( vec_add( vec_sub( vec_madd( mJ, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mK, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getX() ) );
-    res3.setY( vec_add( vec_sub( vec_madd( mN, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mO, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getY() ) );
-    res3.setZ( vec_sub( vec_sub( vec_madd( mC, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getZ() ) );
-    res3.setW( vec_sub( vec_sub( vec_madd( mG, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getW() ) );
-    res1.setX( vec_sub( vec_sub( vec_madd( mK, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getX() ) );
-    res1.setY( vec_sub( vec_sub( vec_madd( mO, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getY() ) );
-    res1.setZ( vec_add( vec_sub( vec_madd( mD, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getZ() ) );
-    res1.setW( vec_add( vec_sub( vec_madd( mH, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getW() ) );
-    return Matrix4(
-        ( res0 * detInv ),
-        ( res1 * detInv ),
-        ( res2 * detInv ),
-        ( res3 * detInv )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline vec_float4 determinant( const Matrix4 & mat )
-{
-    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dx = vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dy = vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dz = vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    dw = vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return vec_add( vec_add( vec_add( vec_madd( mA, dx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, dy, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, dz, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, dw, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( vec_float4 scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
-{
-    return Vector4(
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getX(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getY(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getZ(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getW(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
-{
-    return Vector4(
-        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
-{
-    return Vector4(
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getX() ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getY() ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getZ() ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getW(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getW() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::yAxis( ),
-        Vector4( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    return Matrix4(
-        Vector4( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
-    return Matrix4(
-        Vector4( vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
-{
-    return Matrix4(
-        Vector4( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 f, rangeInv;
-    f = tanf4( vec_sub( ((vec_float4){_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2}), vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), fovyRadians, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-    rangeInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    return Matrix4(
-        Vector4( divf4( f, aspect ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), f, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_add( zNear, zFar ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( vec_madd( zNear, zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = vec_add( right, left );
-    sum_tb = vec_add( top, bottom );
-    sum_nf = vec_add( zNear, zFar );
-    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
-    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
-    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    n2 = vec_add( zNear, zNear );
-    return Matrix4(
-        Vector4( vec_madd( n2, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( n2, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_madd( sum_rl, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_tb, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( n2, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = vec_add( right, left );
-    sum_tb = vec_add( top, bottom );
-    sum_nf = vec_add( zNear, zFar );
-    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
-    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
-    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
-    return Matrix4(
-        Vector4( vec_add( inv_rl, inv_rl ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_tb, inv_tb ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_nf, inv_nf ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector4( vec_madd( negatef4( sum_rl ), inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( negatef4( sum_tb ), inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    Aos::Matrix4 mat0, mat1, mat2, mat3;
-    mat.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( vec_float4 scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Aos::Transform3 & tfrm )
-{
-    mCol0 = Vector3( tfrm.getCol0() );
-    mCol1 = Vector3( tfrm.getCol1() );
-    mCol2 = Vector3( tfrm.getCol2() );
-    mCol3 = Vector3( tfrm.getCol3() );
-}
-
-inline Transform3::Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 )
-{
-    mCol0 = Vector3( tfrm0.getCol0(), tfrm1.getCol0(), tfrm2.getCol0(), tfrm3.getCol0() );
-    mCol1 = Vector3( tfrm0.getCol1(), tfrm1.getCol1(), tfrm2.getCol1(), tfrm3.getCol1() );
-    mCol2 = Vector3( tfrm0.getCol2(), tfrm1.getCol2(), tfrm2.getCol2(), tfrm3.getCol2() );
-    mCol3 = Vector3( tfrm0.getCol3(), tfrm1.getCol3(), tfrm2.getCol3(), tfrm3.getCol3() );
-}
-
-inline void Transform3::get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const
-{
-    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol0( tmpV3_0 );
-    result1.setCol0( tmpV3_1 );
-    result2.setCol0( tmpV3_2 );
-    result3.setCol0( tmpV3_3 );
-    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol1( tmpV3_0 );
-    result1.setCol1( tmpV3_1 );
-    result2.setCol1( tmpV3_2 );
-    result3.setCol1( tmpV3_3 );
-    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol2( tmpV3_0 );
-    result1.setCol2( tmpV3_1 );
-    result2.setCol2( tmpV3_2 );
-    result3.setCol2( tmpV3_3 );
-    mCol3.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol3( tmpV3_0 );
-    result1.setCol3( tmpV3_1 );
-    result2.setCol3( tmpV3_2 );
-    result3.setCol3( tmpV3_3 );
-}
-
-inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, vec_float4 val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
-    vec_float4 detinv;
-    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
-    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
-    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
-    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( tfrm.getCol2(), tmp2 ) );
-    inv0 = Vector3( vec_madd( tmp0.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    inv1 = Vector3( vec_madd( tmp0.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    inv2 = Vector3( vec_madd( tmp0.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    Vector3 inv0, inv1, inv2;
-    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
-    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
-    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Point3 Transform3::operator *( const Point3 & pnt ) const
-{
-    return Point3(
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getX() ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getY() ),
-        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getZ() )
-    );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) ),
-        Vector3::yAxis( ),
-        Vector3( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3::zAxis( ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    return Transform3(
-        Vector3( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) ),
-        Vector3( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        Vector3( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Transform3 Transform3::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Transform3 Transform3::rotation( const Quat & unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
-{
-    return Transform3(
-        Vector3( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ() ),
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( const Vector3 & translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    Aos::Transform3 mat0, mat1, mat2, mat3;
-    tfrm.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
-    vec_uint4 largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm.getCol0().getX();
-    yx = tfrm.getCol0().getY();
-    zx = tfrm.getCol0().getZ();
-    xy = tfrm.getCol1().getX();
-    yy = tfrm.getCol1().getY();
-    zy = tfrm.getCol1().getZ();
-    xz = tfrm.getCol2().getX();
-    yz = tfrm.getCol2().getY();
-    zz = tfrm.getCol2().getZ();
-
-    trace = vec_add( vec_add( xx, yy ), zz );
-
-    negTrace = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), trace );
-    ZgtX = (vec_uint4)vec_cmpgt( zz, xx );
-    ZgtY = (vec_uint4)vec_cmpgt( zz, yy );
-    YgtX = (vec_uint4)vec_cmpgt( yy, xx );
-    largestXorY = vec_andc( negTrace, vec_and( ZgtX, ZgtY ) );
-    largestYorZ = vec_and( negTrace, vec_or( YgtX, ZgtX ) );
-    largestZorX = vec_andc( negTrace, vec_andc( YgtX, ZgtY ) );
-    
-    zz = vec_sel( zz, negatef4(zz), largestXorY );
-    xy = vec_sel( xy, negatef4(xy), largestXorY );
-    xx = vec_sel( xx, negatef4(xx), largestYorZ );
-    yz = vec_sel( yz, negatef4(yz), largestYorZ );
-    yy = vec_sel( yy, negatef4(yy), largestZorX );
-    zx = vec_sel( zx, negatef4(zx), largestZorX );
-
-    radicand = vec_add( vec_add( vec_add( xx, yy ), zz ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-    scale = vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( radicand ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-
-    tmpx = vec_madd( vec_sub( zy, yz ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpy = vec_madd( vec_sub( xz, zx ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpz = vec_madd( vec_sub( yx, xy ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    tmpw = vec_madd( radicand, scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    qx = vec_sel( qx, tmpw, largestXorY );
-    qy = vec_sel( qy, tmpz, largestXorY );
-    qz = vec_sel( qz, tmpy, largestXorY );
-    qw = vec_sel( qw, tmpx, largestXorY );
-    tmpx = qx;
-    tmpz = qz;
-    qx = vec_sel( qx, qy, largestYorZ );
-    qy = vec_sel( qy, tmpx, largestYorZ );
-    qz = vec_sel( qz, qw, largestYorZ );
-    qw = vec_sel( qw, tmpz, largestYorZ );
-
-    mX = qx;
-    mY = qy;
-    mZ = qz;
-    mW = qw;
-}
-
-inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Vector3(
-        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol0().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol0().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol0().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol1().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol1().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol1().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol2().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol2().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol2().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Matrix3 crossMatrix( const Vector3 & vec )
-{
-    return Matrix3(
-        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec.getZ(), negatef4( vec.getY() ) ),
-        Vector3( negatef4( vec.getZ() ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec.getX() ),
-        Vector3( vec.getY(), negatef4( vec.getX() ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_CPP_H
+#define _VECTORMATH_MAT_SOA_CPP_H
+
+namespace Vectormath {
+namespace Soa {
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( vec_float4 scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const Quat & unitQuat )
+{
+    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat.getX();
+    qy = unitQuat.getY();
+    qz = unitQuat.getZ();
+    qw = unitQuat.getW();
+    qx2 = vec_add( qx, qx );
+    qy2 = vec_add( qy, qy );
+    qz2 = vec_add( qz, qz );
+    qxqx2 = vec_madd( qx, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqy2 = vec_madd( qx, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqz2 = vec_madd( qx, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qxqw2 = vec_madd( qw, qx2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqy2 = vec_madd( qy, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqz2 = vec_madd( qy, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qyqw2 = vec_madd( qw, qy2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qzqz2 = vec_madd( qz, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qzqw2 = vec_madd( qw, qz2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol0 = Vector3( vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qyqy2 ), qzqz2 ), vec_add( qxqy2, qzqw2 ), vec_sub( qxqz2, qyqw2 ) );
+    mCol1 = Vector3( vec_sub( qxqy2, qzqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qzqz2 ), vec_add( qyqz2, qxqw2 ) );
+    mCol2 = Vector3( vec_add( qxqz2, qyqw2 ), vec_sub( qyqz2, qxqw2 ), vec_sub( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), qxqx2 ), qyqy2 ) );
+}
+
+inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3::Matrix3( const Aos::Matrix3 & mat )
+{
+    mCol0 = Vector3( mat.getCol0() );
+    mCol1 = Vector3( mat.getCol1() );
+    mCol2 = Vector3( mat.getCol2() );
+}
+
+inline Matrix3::Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 )
+{
+    mCol0 = Vector3( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
+    mCol1 = Vector3( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
+    mCol2 = Vector3( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
+}
+
+inline void Matrix3::get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const
+{
+    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol0( tmpV3_0 );
+    result1.setCol0( tmpV3_1 );
+    result2.setCol0( tmpV3_2 );
+    result3.setCol0( tmpV3_3 );
+    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol1( tmpV3_0 );
+    result1.setCol1( tmpV3_1 );
+    result2.setCol1( tmpV3_2 );
+    result3.setCol1( tmpV3_3 );
+    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol2( tmpV3_0 );
+    result1.setCol2( tmpV3_1 );
+    result2.setCol2( tmpV3_2 );
+    result3.setCol2( tmpV3_3 );
+}
+
+inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, vec_float4 val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    return Matrix3(
+        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
+        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
+        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    Vector3 tmp0, tmp1, tmp2;
+    vec_float4 detinv;
+    tmp0 = cross( mat.getCol1(), mat.getCol2() );
+    tmp1 = cross( mat.getCol2(), mat.getCol0() );
+    tmp2 = cross( mat.getCol0(), mat.getCol1() );
+    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( mat.getCol2(), tmp2 ) );
+    return Matrix3(
+        Vector3( vec_madd( tmp0.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        Vector3( vec_madd( tmp0.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        Vector3( vec_madd( tmp0.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline vec_float4 determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( vec_float4 scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) ),
+        Vector3::yAxis( ),
+        Vector3( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    return Matrix3(
+        Vector3( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) ),
+        Vector3( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        Vector3( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    return Matrix3(
+        Vector3( vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) ),
+        Vector3( vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) ),
+        Vector3( vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
+{
+    return Matrix3(
+        Vector3( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ() )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    Aos::Matrix3 mat0, mat1, mat2, mat3;
+    mat.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( vec_float4 scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol3 = Vector4( mat.getCol3(), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol3 = Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol1 = Vector4( mat.getCol1(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol2 = Vector4( mat.getCol2(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    mCol3 = Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline Matrix4::Matrix4( const Aos::Matrix4 & mat )
+{
+    mCol0 = Vector4( mat.getCol0() );
+    mCol1 = Vector4( mat.getCol1() );
+    mCol2 = Vector4( mat.getCol2() );
+    mCol3 = Vector4( mat.getCol3() );
+}
+
+inline Matrix4::Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 )
+{
+    mCol0 = Vector4( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
+    mCol1 = Vector4( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
+    mCol2 = Vector4( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
+    mCol3 = Vector4( mat0.getCol3(), mat1.getCol3(), mat2.getCol3(), mat3.getCol3() );
+}
+
+inline void Matrix4::get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const
+{
+    Aos::Vector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    mCol0.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol0( tmpV4_0 );
+    result1.setCol0( tmpV4_1 );
+    result2.setCol0( tmpV4_2 );
+    result3.setCol0( tmpV4_3 );
+    mCol1.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol1( tmpV4_0 );
+    result1.setCol1( tmpV4_1 );
+    result2.setCol1( tmpV4_2 );
+    result3.setCol1( tmpV4_3 );
+    mCol2.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol2( tmpV4_0 );
+    result1.setCol2( tmpV4_1 );
+    result2.setCol2( tmpV4_2 );
+    result3.setCol2( tmpV4_3 );
+    mCol3.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol3( tmpV4_0 );
+    result1.setCol3( tmpV4_1 );
+    result2.setCol3( tmpV4_2 );
+    result3.setCol3( tmpV4_3 );
+}
+
+inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, vec_float4 val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    return Matrix4(
+        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
+        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
+        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
+        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    Vector4 res0, res1, res2, res3;
+    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res0.setX( vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    res0.setY( vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    res0.setZ( vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    res0.setW( vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    detInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_add( vec_add( vec_add( vec_madd( mA, res0.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, res0.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, res0.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, res0.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    res1.setX( vec_madd( mI, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res1.setY( vec_madd( mM, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res1.setZ( vec_madd( mA, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res1.setW( vec_madd( mE, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res3.setX( vec_madd( mI, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res3.setY( vec_madd( mM, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res3.setZ( vec_madd( mA, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res3.setW( vec_madd( mE, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res2.setX( vec_madd( mI, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res2.setY( vec_madd( mM, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res2.setZ( vec_madd( mA, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res2.setW( vec_madd( mE, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp0 = vec_sub( vec_madd( mI, mB, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mJ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mM, mF, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mN, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mI, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mM, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mI, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mA, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mM, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    res2.setX( vec_add( vec_sub( vec_madd( mL, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getX() ) );
+    res2.setY( vec_add( vec_sub( vec_madd( mP, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getY() ) );
+    res2.setZ( vec_sub( vec_sub( vec_madd( mB, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mD, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getZ() ) );
+    res2.setW( vec_sub( vec_sub( vec_madd( mF, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mH, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res2.getW() ) );
+    res3.setX( vec_add( vec_sub( vec_madd( mJ, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mK, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getX() ) );
+    res3.setY( vec_add( vec_sub( vec_madd( mN, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mO, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getY() ) );
+    res3.setZ( vec_sub( vec_sub( vec_madd( mC, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getZ() ) );
+    res3.setW( vec_sub( vec_sub( vec_madd( mG, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res3.getW() ) );
+    res1.setX( vec_sub( vec_sub( vec_madd( mK, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getX() ) );
+    res1.setY( vec_sub( vec_sub( vec_madd( mO, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getY() ) );
+    res1.setZ( vec_add( vec_sub( vec_madd( mD, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getZ() ) );
+    res1.setW( vec_add( vec_sub( vec_madd( mH, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), res1.getW() ) );
+    return Matrix4(
+        ( res0 * detInv ),
+        ( res1 * detInv ),
+        ( res2 * detInv ),
+        ( res3 * detInv )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline vec_float4 determinant( const Matrix4 & mat )
+{
+    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = vec_sub( vec_madd( mK, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp1 = vec_sub( vec_madd( mO, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp2 = vec_sub( vec_madd( mB, mK, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mJ, mC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp3 = vec_sub( vec_madd( mF, mO, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mN, mG, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp4 = vec_sub( vec_madd( mJ, mD, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mB, mL, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmp5 = vec_sub( vec_madd( mN, mH, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mF, mP, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dx = vec_sub( vec_sub( vec_madd( mJ, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mL, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mK, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dy = vec_sub( vec_sub( vec_madd( mN, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mP, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mO, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dz = vec_sub( vec_add( vec_madd( mD, tmp3, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mC, tmp5, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mB, tmp1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    dw = vec_sub( vec_add( vec_madd( mH, tmp2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mG, tmp4, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mF, tmp0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return vec_add( vec_add( vec_add( vec_madd( mA, dx, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mE, dy, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mI, dz, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mM, dw, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( vec_float4 scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
+{
+    return Vector4(
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getX(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getY(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getZ(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol3.getW(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
+{
+    return Vector4(
+        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
+{
+    return Vector4(
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getX() ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getY() ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getZ() ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getW(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getW(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getW(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getW() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::yAxis( ),
+        Vector4( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    return Matrix4(
+        Vector4( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = vec_madd( x, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    yz = vec_madd( y, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    zx = vec_madd( z, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    oneMinusC = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), c );
+    return Matrix4(
+        Vector4( vec_add( vec_madd( vec_madd( x, x, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_sub( vec_madd( xy, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( z, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( y, y, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), vec_add( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_add( vec_madd( zx, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( y, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( yz, oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( x, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( vec_madd( z, z, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), oneMinusC, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
+{
+    return Matrix4(
+        Vector4( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, ((vec_float4){1.0f,1.0f,1.0f,1.0f}) )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 f, rangeInv;
+    f = tanf4( vec_sub( ((vec_float4){_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2,_VECTORMATH_PI_OVER_2}), vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), fovyRadians, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+    rangeInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    return Matrix4(
+        Vector4( divf4( f, aspect ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), f, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_add( zNear, zFar ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( vec_madd( zNear, zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), rangeInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = vec_add( right, left );
+    sum_tb = vec_add( top, bottom );
+    sum_nf = vec_add( zNear, zFar );
+    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
+    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
+    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    n2 = vec_add( zNear, zNear );
+    return Matrix4(
+        Vector4( vec_madd( n2, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( n2, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_madd( sum_rl, inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_tb, inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){-1.0f,-1.0f,-1.0f,-1.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_madd( vec_madd( n2, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), zFar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = vec_add( right, left );
+    sum_tb = vec_add( top, bottom );
+    sum_nf = vec_add( zNear, zFar );
+    inv_rl = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( right, left ) );
+    inv_tb = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( top, bottom ) );
+    inv_nf = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec_sub( zNear, zFar ) );
+    return Matrix4(
+        Vector4( vec_add( inv_rl, inv_rl ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_tb, inv_tb ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec_add( inv_nf, inv_nf ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector4( vec_madd( negatef4( sum_rl ), inv_rl, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( negatef4( sum_tb ), inv_tb, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sum_nf, inv_nf, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    Aos::Matrix4 mat0, mat1, mat2, mat3;
+    mat.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( vec_float4 scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Aos::Transform3 & tfrm )
+{
+    mCol0 = Vector3( tfrm.getCol0() );
+    mCol1 = Vector3( tfrm.getCol1() );
+    mCol2 = Vector3( tfrm.getCol2() );
+    mCol3 = Vector3( tfrm.getCol3() );
+}
+
+inline Transform3::Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 )
+{
+    mCol0 = Vector3( tfrm0.getCol0(), tfrm1.getCol0(), tfrm2.getCol0(), tfrm3.getCol0() );
+    mCol1 = Vector3( tfrm0.getCol1(), tfrm1.getCol1(), tfrm2.getCol1(), tfrm3.getCol1() );
+    mCol2 = Vector3( tfrm0.getCol2(), tfrm1.getCol2(), tfrm2.getCol2(), tfrm3.getCol2() );
+    mCol3 = Vector3( tfrm0.getCol3(), tfrm1.getCol3(), tfrm2.getCol3(), tfrm3.getCol3() );
+}
+
+inline void Transform3::get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const
+{
+    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol0( tmpV3_0 );
+    result1.setCol0( tmpV3_1 );
+    result2.setCol0( tmpV3_2 );
+    result3.setCol0( tmpV3_3 );
+    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol1( tmpV3_0 );
+    result1.setCol1( tmpV3_1 );
+    result2.setCol1( tmpV3_2 );
+    result3.setCol1( tmpV3_3 );
+    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol2( tmpV3_0 );
+    result1.setCol2( tmpV3_1 );
+    result2.setCol2( tmpV3_2 );
+    result3.setCol2( tmpV3_3 );
+    mCol3.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol3( tmpV3_0 );
+    result1.setCol3( tmpV3_1 );
+    result2.setCol3( tmpV3_2 );
+    result3.setCol3( tmpV3_3 );
+}
+
+inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, vec_float4 val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
+    vec_float4 detinv;
+    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
+    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
+    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
+    detinv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( tfrm.getCol2(), tmp2 ) );
+    inv0 = Vector3( vec_madd( tmp0.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getX(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    inv1 = Vector3( vec_madd( tmp0.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getY(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    inv2 = Vector3( vec_madd( tmp0.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp1.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmp2.getZ(), detinv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    Vector3 inv0, inv1, inv2;
+    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
+    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
+    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        vec_add( vec_add( vec_madd( mCol0.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( mCol0.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Point3 Transform3::operator *( const Point3 & pnt ) const
+{
+    return Point3(
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getX(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getX(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getX(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getX() ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getY(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getY(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getY(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getY() ),
+        vec_add( vec_add( vec_add( vec_madd( mCol0.getZ(), pnt.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mCol1.getZ(), pnt.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mCol2.getZ(), pnt.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), mCol3.getZ() )
+    );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c, s ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ), c ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3( c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), negatef4( s ) ),
+        Vector3::yAxis( ),
+        Vector3( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3( c, s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( negatef4( s ), c, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3::zAxis( ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = vec_madd( cZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmp1 = vec_madd( sZ, sY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    return Transform3(
+        Vector3( vec_madd( cZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), negatef4( sY ) ),
+        Vector3( vec_sub( vec_madd( tmp0, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_add( vec_madd( tmp1, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        Vector3( vec_add( vec_madd( tmp0, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( sZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_sub( vec_madd( tmp1, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( cZ, sX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( cY, cX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Transform3 Transform3::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Transform3 Transform3::rotation( const Quat & unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
+{
+    return Transform3(
+        Vector3( scaleVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), scaleVec.getZ() ),
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( const Vector3 & translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    Aos::Transform3 mat0, mat1, mat2, mat3;
+    tfrm.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
+    vec_uint4 largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm.getCol0().getX();
+    yx = tfrm.getCol0().getY();
+    zx = tfrm.getCol0().getZ();
+    xy = tfrm.getCol1().getX();
+    yy = tfrm.getCol1().getY();
+    zy = tfrm.getCol1().getZ();
+    xz = tfrm.getCol2().getX();
+    yz = tfrm.getCol2().getY();
+    zz = tfrm.getCol2().getZ();
+
+    trace = vec_add( vec_add( xx, yy ), zz );
+
+    negTrace = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), trace );
+    ZgtX = (vec_uint4)vec_cmpgt( zz, xx );
+    ZgtY = (vec_uint4)vec_cmpgt( zz, yy );
+    YgtX = (vec_uint4)vec_cmpgt( yy, xx );
+    largestXorY = vec_andc( negTrace, vec_and( ZgtX, ZgtY ) );
+    largestYorZ = vec_and( negTrace, vec_or( YgtX, ZgtX ) );
+    largestZorX = vec_andc( negTrace, vec_andc( YgtX, ZgtY ) );
+    
+    zz = vec_sel( zz, negatef4(zz), largestXorY );
+    xy = vec_sel( xy, negatef4(xy), largestXorY );
+    xx = vec_sel( xx, negatef4(xx), largestYorZ );
+    yz = vec_sel( yz, negatef4(yz), largestYorZ );
+    yy = vec_sel( yy, negatef4(yy), largestZorX );
+    zx = vec_sel( zx, negatef4(zx), largestZorX );
+
+    radicand = vec_add( vec_add( vec_add( xx, yy ), zz ), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    scale = vec_madd( ((vec_float4){0.5f,0.5f,0.5f,0.5f}), divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( radicand ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+
+    tmpx = vec_madd( vec_sub( zy, yz ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpy = vec_madd( vec_sub( xz, zx ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpz = vec_madd( vec_sub( yx, xy ), scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    tmpw = vec_madd( radicand, scale, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    qx = vec_sel( qx, tmpw, largestXorY );
+    qy = vec_sel( qy, tmpz, largestXorY );
+    qz = vec_sel( qz, tmpy, largestXorY );
+    qw = vec_sel( qw, tmpx, largestXorY );
+    tmpx = qx;
+    tmpz = qz;
+    qx = vec_sel( qx, qy, largestYorZ );
+    qy = vec_sel( qy, tmpx, largestYorZ );
+    qz = vec_sel( qz, qw, largestYorZ );
+    qw = vec_sel( qw, tmpz, largestYorZ );
+
+    mX = qx;
+    mY = qy;
+    mZ = qz;
+    mW = qw;
+}
+
+inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Vector3(
+        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol0().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol0().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol0().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol1().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol1().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol1().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_add( vec_madd( vec.getX(), mat.getCol2().getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec.getY(), mat.getCol2().getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( vec.getZ(), mat.getCol2().getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Matrix3 crossMatrix( const Vector3 & vec )
+{
+    return Matrix3(
+        Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec.getZ(), negatef4( vec.getY() ) ),
+        Vector3( negatef4( vec.getZ() ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), vec.getX() ),
+        Vector3( vec.getY(), negatef4( vec.getX() ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_aos.h
index 196a48d98..4ab6ffe08 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_aos.h
@@ -1,536 +1,536 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_CPP_H
-#define _VECTORMATH_QUAT_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Quat::Quat( float _x, float _y, float _z, float _w )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
-        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
-        mVec128 = (vec_float4){_x, _y, _z, _w};
-    } else {
-        float *pf = (float *)&mVec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-        pf[3] = _w;
-    }
-}
-
-inline Quat::Quat( floatInVec _x, floatInVec _y, floatInVec _z, floatInVec _w )
-{
-    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
-    vec_float4 yw = vec_mergeh( _y.get128(), _w.get128() );
-    mVec128 = vec_mergeh( xz, yw );
-}
-
-inline Quat::Quat( Vector3 xyz, float _w )
-{
-    mVec128 = xyz.get128();
-    _vmathVfSetElement(mVec128, _w, 3);
-}
-
-inline Quat::Quat( Vector3 xyz, floatInVec _w )
-{
-    mVec128 = xyz.get128();
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-}
-
-inline Quat::Quat( Vector4 vec )
-{
-    mVec128 = vec.get128();
-}
-
-inline Quat::Quat( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Quat::Quat( floatInVec scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Quat::Quat( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( _VECTORMATH_UNIT_0001 );
-}
-
-inline const Quat lerp( float t, Quat quat0, Quat quat1 )
-{
-    return lerp( floatInVec(t), quat0, quat1 );
-}
-
-inline const Quat lerp( floatInVec t, Quat quat0, Quat quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 )
-{
-    return slerp( floatInVec(t), unitQuat0, unitQuat1 );
-}
-
-inline const Quat slerp( floatInVec t, Quat unitQuat0, Quat unitQuat1 )
-{
-    Quat start;
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), cosAngle );
-    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = t.get128();
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    return Quat( vec_madd( start.get128(), scale0, vec_madd( unitQuat1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-}
-
-inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
-{
-    return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 );
-}
-
-inline const Quat squad( floatInVec t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), tmp0, tmp1 );
-}
-
-inline vec_float4 Quat::get128( ) const
-{
-    return mVec128;
-}
-
-inline Quat & Quat::operator =( Quat quat )
-{
-    mVec128 = quat.mVec128;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( Vector3 vec )
-{
-    mVec128 = vec_sel( vec.get128(), mVec128, _VECTORMATH_MASK_0x000F );
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mVec128 );
-}
-
-inline Quat & Quat::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Quat & Quat::setX( floatInVec _x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Quat::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Quat & Quat::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Quat & Quat::setY( floatInVec _y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Quat::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Quat & Quat::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Quat & Quat::setZ( floatInVec _z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Quat::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Quat & Quat::setW( float _w )
-{
-    _vmathVfSetElement(mVec128, _w, 3);
-    return *this;
-}
-
-inline Quat & Quat::setW( floatInVec _w )
-{
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-    return *this;
-}
-
-inline const floatInVec Quat::getW( ) const
-{
-    return floatInVec( mVec128, 3 );
-}
-
-inline Quat & Quat::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Quat & Quat::setElem( int idx, floatInVec value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Quat::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Quat::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Quat::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Quat Quat::operator +( Quat quat ) const
-{
-    return Quat( vec_add( mVec128, quat.mVec128 ) );
-}
-
-inline const Quat Quat::operator -( Quat quat ) const
-{
-    return Quat( vec_sub( mVec128, quat.mVec128 ) );
-}
-
-inline const Quat Quat::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Quat Quat::operator *( floatInVec scalar ) const
-{
-    return Quat( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline Quat & Quat::operator +=( Quat quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( Quat quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( floatInVec scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( float scalar ) const
-{
-    return *this / floatInVec(scalar);
-}
-
-inline const Quat Quat::operator /( floatInVec scalar ) const
-{
-    return Quat( divf4( mVec128, scalar.get128() ) );
-}
-
-inline Quat & Quat::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline Quat & Quat::operator /=( floatInVec scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat( negatef4( mVec128 ) );
-}
-
-inline const Quat operator *( float scalar, Quat quat )
-{
-    return floatInVec(scalar) * quat;
-}
-
-inline const Quat operator *( floatInVec scalar, Quat quat )
-{
-    return quat * scalar;
-}
-
-inline const floatInVec dot( Quat quat0, Quat quat1 )
-{
-    return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
-}
-
-inline const floatInVec norm( Quat quat )
-{
-    return floatInVec(  _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
-}
-
-inline const floatInVec length( Quat quat )
-{
-    return floatInVec(  sqrtf4(_vmathVfDot4( quat.get128(), quat.get128() )), 0 );
-}
-
-inline const Quat normalize( Quat quat )
-{
-    vec_float4 dot = _vmathVfDot4( quat.get128(), quat.get128() );
-    return Quat( vec_madd( quat.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Quat Quat::rotation( Vector3 unitVec0, Vector3 unitVec1 )
-{
-    Vector3 crossVec;
-    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
-    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = vec_splat( cosAngle, 0 );
-    cosAngleX2Plus2 = vec_madd( cosAngle, ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){2.0f,2.0f,2.0f,2.0f}) );
-    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
-    cosHalfAngleX2 = vec_madd( recipCosHalfAngleX2, cosAngleX2Plus2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    crossVec = cross( unitVec0, unitVec1 );
-    res = vec_madd( crossVec.get128(), recipCosHalfAngleX2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_sel( res, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), _VECTORMATH_MASK_0x000F );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotation( float radians, Vector3 unitVec )
-{
-    return rotation( floatInVec(radians), unitVec );
-}
-
-inline const Quat Quat::rotation( floatInVec radians, Vector3 unitVec )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( vec_madd( unitVec.get128(), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c, _VECTORMATH_MASK_0x000F );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationX( float radians )
-{
-    return rotationX( floatInVec(radians) );
-}
-
-inline const Quat Quat::rotationX( floatInVec radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0xF000 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationY( float radians )
-{
-    return rotationY( floatInVec(radians) );
-}
-
-inline const Quat Quat::rotationY( floatInVec radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x0F00 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationZ( float radians )
-{
-    return rotationZ( floatInVec(radians) );
-}
-
-inline const Quat Quat::rotationZ( floatInVec radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x00F0 );
-    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
-    return Quat( res );
-}
-
-inline const Quat Quat::operator *( Quat quat ) const
-{
-    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
-    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
-    ldata = mVec128;
-    rdata = quat.mVec128;
-    tmp0 = vec_perm( ldata, ldata, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( rdata, rdata, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( ldata, ldata, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( rdata, rdata, _VECTORMATH_PERM_YZXW );
-    qv = vec_madd( vec_splat( ldata, 3 ), rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
-    qv = vec_madd( tmp0, tmp1, qv );
-    qv = vec_nmsub( tmp2, tmp3, qv );
-    product = vec_madd( ldata, rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    l_wxyz = vec_sld( ldata, ldata, 12 );
-    r_wxyz = vec_sld( rdata, rdata, 12 );
-    qw = vec_nmsub( l_wxyz, r_wxyz, product );
-    xy = vec_madd( l_wxyz, r_wxyz, product );
-    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
-    return Quat( vec_sel( qv, qw, _VECTORMATH_MASK_0x000F ) );
-}
-
-inline Quat & Quat::operator *=( Quat quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( Quat quat, Vector3 vec )
-{
-    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
-    qdata = quat.get128();
-    vdata = vec.get128();
-    tmp0 = vec_perm( qdata, qdata, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( vdata, vdata, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( qdata, qdata, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( vdata, vdata, _VECTORMATH_PERM_YZXW );
-    wwww = vec_splat( qdata, 3 );
-    qv = vec_madd( wwww, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qv = vec_madd( tmp0, tmp1, qv );
-    qv = vec_nmsub( tmp2, tmp3, qv );
-    product = vec_madd( qdata, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
-    qw = vec_add( vec_sld( product, product, 8 ), qw );
-    tmp1 = vec_perm( qv, qv, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( qv, qv, _VECTORMATH_PERM_YZXW );
-    res = vec_madd( vec_splat( qw, 0 ), qdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    res = vec_madd( wwww, qv, res );
-    res = vec_madd( tmp0, tmp1, res );
-    res = vec_nmsub( tmp2, tmp3, res );
-    return Vector3( res );
-}
-
-inline const Quat conj( Quat quat )
-{
-    return Quat( vec_xor( quat.get128(), ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) ) );
-}
-
-inline const Quat select( Quat quat0, Quat quat1, bool select1 )
-{
-    return select( quat0, quat1, boolInVec(select1) );
-}
-
-inline const Quat select( Quat quat0, Quat quat1, boolInVec select1 )
-{
-    return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Quat quat )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat.get128();
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-inline void print( Quat quat, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat.get128();
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Quat::Quat( float _x, float _y, float _z, float _w )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
+        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
+        mVec128 = (vec_float4){_x, _y, _z, _w};
+    } else {
+        float *pf = (float *)&mVec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+        pf[3] = _w;
+    }
+}
+
+inline Quat::Quat( floatInVec _x, floatInVec _y, floatInVec _z, floatInVec _w )
+{
+    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
+    vec_float4 yw = vec_mergeh( _y.get128(), _w.get128() );
+    mVec128 = vec_mergeh( xz, yw );
+}
+
+inline Quat::Quat( Vector3 xyz, float _w )
+{
+    mVec128 = xyz.get128();
+    _vmathVfSetElement(mVec128, _w, 3);
+}
+
+inline Quat::Quat( Vector3 xyz, floatInVec _w )
+{
+    mVec128 = xyz.get128();
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+}
+
+inline Quat::Quat( Vector4 vec )
+{
+    mVec128 = vec.get128();
+}
+
+inline Quat::Quat( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Quat::Quat( floatInVec scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Quat::Quat( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( _VECTORMATH_UNIT_0001 );
+}
+
+inline const Quat lerp( float t, Quat quat0, Quat quat1 )
+{
+    return lerp( floatInVec(t), quat0, quat1 );
+}
+
+inline const Quat lerp( floatInVec t, Quat quat0, Quat quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 )
+{
+    return slerp( floatInVec(t), unitQuat0, unitQuat1 );
+}
+
+inline const Quat slerp( floatInVec t, Quat unitQuat0, Quat unitQuat1 )
+{
+    Quat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Quat( vec_madd( start.get128(), scale0, vec_madd( unitQuat1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+}
+
+inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
+{
+    return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 );
+}
+
+inline const Quat squad( floatInVec t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), tmp0, tmp1 );
+}
+
+inline vec_float4 Quat::get128( ) const
+{
+    return mVec128;
+}
+
+inline Quat & Quat::operator =( Quat quat )
+{
+    mVec128 = quat.mVec128;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( Vector3 vec )
+{
+    mVec128 = vec_sel( vec.get128(), mVec128, _VECTORMATH_MASK_0x000F );
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+inline Quat & Quat::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Quat & Quat::setX( floatInVec _x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Quat::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Quat & Quat::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Quat & Quat::setY( floatInVec _y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Quat::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Quat & Quat::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Quat & Quat::setZ( floatInVec _z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Quat::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Quat & Quat::setW( float _w )
+{
+    _vmathVfSetElement(mVec128, _w, 3);
+    return *this;
+}
+
+inline Quat & Quat::setW( floatInVec _w )
+{
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+    return *this;
+}
+
+inline const floatInVec Quat::getW( ) const
+{
+    return floatInVec( mVec128, 3 );
+}
+
+inline Quat & Quat::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Quat & Quat::setElem( int idx, floatInVec value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Quat::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Quat::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Quat::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Quat Quat::operator +( Quat quat ) const
+{
+    return Quat( vec_add( mVec128, quat.mVec128 ) );
+}
+
+inline const Quat Quat::operator -( Quat quat ) const
+{
+    return Quat( vec_sub( mVec128, quat.mVec128 ) );
+}
+
+inline const Quat Quat::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Quat Quat::operator *( floatInVec scalar ) const
+{
+    return Quat( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline Quat & Quat::operator +=( Quat quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( Quat quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( floatInVec scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+inline const Quat Quat::operator /( floatInVec scalar ) const
+{
+    return Quat( divf4( mVec128, scalar.get128() ) );
+}
+
+inline Quat & Quat::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline Quat & Quat::operator /=( floatInVec scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat( negatef4( mVec128 ) );
+}
+
+inline const Quat operator *( float scalar, Quat quat )
+{
+    return floatInVec(scalar) * quat;
+}
+
+inline const Quat operator *( floatInVec scalar, Quat quat )
+{
+    return quat * scalar;
+}
+
+inline const floatInVec dot( Quat quat0, Quat quat1 )
+{
+    return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
+}
+
+inline const floatInVec norm( Quat quat )
+{
+    return floatInVec(  _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
+}
+
+inline const floatInVec length( Quat quat )
+{
+    return floatInVec(  sqrtf4(_vmathVfDot4( quat.get128(), quat.get128() )), 0 );
+}
+
+inline const Quat normalize( Quat quat )
+{
+    vec_float4 dot = _vmathVfDot4( quat.get128(), quat.get128() );
+    return Quat( vec_madd( quat.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Quat Quat::rotation( Vector3 unitVec0, Vector3 unitVec1 )
+{
+    Vector3 crossVec;
+    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = vec_splat( cosAngle, 0 );
+    cosAngleX2Plus2 = vec_madd( cosAngle, ((vec_float4){2.0f,2.0f,2.0f,2.0f}), ((vec_float4){2.0f,2.0f,2.0f,2.0f}) );
+    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
+    cosHalfAngleX2 = vec_madd( recipCosHalfAngleX2, cosAngleX2Plus2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    crossVec = cross( unitVec0, unitVec1 );
+    res = vec_madd( crossVec.get128(), recipCosHalfAngleX2, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_sel( res, vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), _VECTORMATH_MASK_0x000F );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotation( float radians, Vector3 unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+inline const Quat Quat::rotation( floatInVec radians, Vector3 unitVec )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( vec_madd( unitVec.get128(), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), c, _VECTORMATH_MASK_0x000F );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+inline const Quat Quat::rotationX( floatInVec radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0xF000 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+inline const Quat Quat::rotationY( floatInVec radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x0F00 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+inline const Quat Quat::rotationZ( floatInVec radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = vec_madd( radians.get128(), ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    res = vec_sel( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, _VECTORMATH_MASK_0x00F0 );
+    res = vec_sel( res, c, _VECTORMATH_MASK_0x000F );
+    return Quat( res );
+}
+
+inline const Quat Quat::operator *( Quat quat ) const
+{
+    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = mVec128;
+    rdata = quat.mVec128;
+    tmp0 = vec_perm( ldata, ldata, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( rdata, rdata, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( ldata, ldata, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( rdata, rdata, _VECTORMATH_PERM_YZXW );
+    qv = vec_madd( vec_splat( ldata, 3 ), rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_madd( ldata, rdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    l_wxyz = vec_sld( ldata, ldata, 12 );
+    r_wxyz = vec_sld( rdata, rdata, 12 );
+    qw = vec_nmsub( l_wxyz, r_wxyz, product );
+    xy = vec_madd( l_wxyz, r_wxyz, product );
+    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
+    return Quat( vec_sel( qv, qw, _VECTORMATH_MASK_0x000F ) );
+}
+
+inline Quat & Quat::operator *=( Quat quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( Quat quat, Vector3 vec )
+{
+    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat.get128();
+    vdata = vec.get128();
+    tmp0 = vec_perm( qdata, qdata, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( vdata, vdata, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( qdata, qdata, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( vdata, vdata, _VECTORMATH_PERM_YZXW );
+    wwww = vec_splat( qdata, 3 );
+    qv = vec_madd( wwww, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_madd( qdata, vdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
+    qw = vec_add( vec_sld( product, product, 8 ), qw );
+    tmp1 = vec_perm( qv, qv, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( qv, qv, _VECTORMATH_PERM_YZXW );
+    res = vec_madd( vec_splat( qw, 0 ), qdata, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    res = vec_madd( wwww, qv, res );
+    res = vec_madd( tmp0, tmp1, res );
+    res = vec_nmsub( tmp2, tmp3, res );
+    return Vector3( res );
+}
+
+inline const Quat conj( Quat quat )
+{
+    return Quat( vec_xor( quat.get128(), ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) ) );
+}
+
+inline const Quat select( Quat quat0, Quat quat1, bool select1 )
+{
+    return select( quat0, quat1, boolInVec(select1) );
+}
+
+inline const Quat select( Quat quat0, Quat quat1, boolInVec select1 )
+{
+    return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Quat quat )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+inline void print( Quat quat, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_soa.h
index fb83e8121..082eaed26 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/quat_soa.h
@@ -1,479 +1,479 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_CPP_H
-#define _VECTORMATH_QUAT_SOA_CPP_H
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Soa {
-
-inline Quat::Quat( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-}
-
-inline Quat::Quat( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Quat::Quat( const Vector3 & xyz, vec_float4 _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Quat::Quat( const Vector4 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = vec.getW();
-}
-
-inline Quat::Quat( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline Quat::Quat( Aos::Quat quat )
-{
-    vec_float4 vec128 = quat.get128();
-    mX = vec_splat( vec128, 0 );
-    mY = vec_splat( vec128, 1 );
-    mZ = vec_splat( vec128, 2 );
-    mW = vec_splat( vec128, 3 );
-}
-
-inline Quat::Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( quat0.get128(), quat2.get128() );
-    tmp1 = vec_mergeh( quat1.get128(), quat3.get128() );
-    tmp2 = vec_mergel( quat0.get128(), quat2.get128() );
-    tmp3 = vec_mergel( quat1.get128(), quat3.get128() );
-    mX = vec_mergeh( tmp0, tmp1 );
-    mY = vec_mergel( tmp0, tmp1 );
-    mZ = vec_mergeh( tmp2, tmp3 );
-    mW = vec_mergel( tmp2, tmp3 );
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 )
-{
-    Quat start;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitQuat0, unitQuat1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){0.0f,0.0f,0.0f,0.0f}, cosAngle );
-    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start.setX( vec_sel( unitQuat0.getX(), negatef4( unitQuat0.getX() ), selectMask ) );
-    start.setY( vec_sel( unitQuat0.getY(), negatef4( unitQuat0.getY() ), selectMask ) );
-    start.setZ( vec_sel( unitQuat0.getZ(), negatef4( unitQuat0.getZ() ), selectMask ) );
-    start.setW( vec_sel( unitQuat0.getW(), negatef4( unitQuat0.getW() ), selectMask ) );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
-}
-
-inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( vec_madd( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), t, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), tmp0, tmp1 );
-}
-
-inline void Quat::get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( mX, mZ );
-    tmp1 = vec_mergeh( mY, mW );
-    tmp2 = vec_mergel( mX, mZ );
-    tmp3 = vec_mergel( mY, mW );
-    result0 = Aos::Quat( vec_mergeh( tmp0, tmp1 ) );
-    result1 = Aos::Quat( vec_mergel( tmp0, tmp1 ) );
-    result2 = Aos::Quat( vec_mergeh( tmp2, tmp3 ) );
-    result3 = Aos::Quat( vec_mergel( tmp2, tmp3 ) );
-}
-
-inline Quat & Quat::operator =( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Quat & Quat::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Quat::getX( ) const
-{
-    return mX;
-}
-
-inline Quat & Quat::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Quat::getY( ) const
-{
-    return mY;
-}
-
-inline Quat & Quat::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Quat::getZ( ) const
-{
-    return mZ;
-}
-
-inline Quat & Quat::setW( vec_float4 _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline vec_float4 Quat::getW( ) const
-{
-    return mW;
-}
-
-inline Quat & Quat::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Quat::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Quat::vec_float4_t & Quat::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Quat::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Quat Quat::operator +( const Quat & quat ) const
-{
-    return Quat(
-        vec_add( mX, quat.mX ),
-        vec_add( mY, quat.mY ),
-        vec_add( mZ, quat.mZ ),
-        vec_add( mW, quat.mW )
-    );
-}
-
-inline const Quat Quat::operator -( const Quat & quat ) const
-{
-    return Quat(
-        vec_sub( mX, quat.mX ),
-        vec_sub( mY, quat.mY ),
-        vec_sub( mZ, quat.mZ ),
-        vec_sub( mW, quat.mW )
-    );
-}
-
-inline const Quat Quat::operator *( vec_float4 scalar ) const
-{
-    return Quat(
-        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mW, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline Quat & Quat::operator +=( const Quat & quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( const Quat & quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( vec_float4 scalar ) const
-{
-    return Quat(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar ),
-        divf4( mW, scalar )
-    );
-}
-
-inline Quat & Quat::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ ),
-        negatef4( mW )
-    );
-}
-
-inline const Quat operator *( vec_float4 scalar, const Quat & quat )
-{
-    return quat * scalar;
-}
-
-inline vec_float4 dot( const Quat & quat0, const Quat & quat1 )
-{
-    vec_float4 result;
-    result = vec_madd( quat0.getX(), quat1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( quat0.getY(), quat1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat0.getZ(), quat1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat0.getW(), quat1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 norm( const Quat & quat )
-{
-    vec_float4 result;
-    result = vec_madd( quat.getX(), quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( quat.getY(), quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat.getZ(), quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( quat.getW(), quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 length( const Quat & quat )
-{
-    return sqrtf4( norm( quat ) );
-}
-
-inline const Quat normalize( const Quat & quat )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = norm( quat );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    return Quat(
-        vec_madd( quat.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( quat.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( quat.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( quat.getW(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf4( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), vec_add( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( unitVec0, unitVec1 ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    recipCosHalfAngleX2 = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), cosHalfAngleX2 );
-    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Quat Quat::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    return Quat( ( unitVec * s ), c );
-}
-
-inline const Quat Quat::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    return Quat( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-}
-
-inline const Quat Quat::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
-}
-
-inline const Quat Quat::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sincosf4( angle, &s, &c );
-    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, c );
-}
-
-inline const Quat Quat::operator *( const Quat & quat ) const
-{
-    return Quat(
-        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mX, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mY, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mX, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mZ, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mX, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_sub( vec_sub( vec_sub( vec_madd( mW, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mX, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline Quat & Quat::operator *=( const Quat & quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = vec_sub( vec_add( vec_madd( quat.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpY = vec_sub( vec_add( vec_madd( quat.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpZ = vec_sub( vec_add( vec_madd( quat.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    tmpW = vec_add( vec_add( vec_madd( quat.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return Vector3(
-        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpX, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpY, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpZ, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Quat conj( const Quat & quat )
-{
-    return Quat( negatef4( quat.getX() ), negatef4( quat.getY() ), negatef4( quat.getZ() ), quat.getW() );
-}
-
-inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 )
-{
-    return Quat(
-        vec_sel( quat0.getX(), quat1.getX(), select1 ),
-        vec_sel( quat0.getY(), quat1.getY(), select1 ),
-        vec_sel( quat0.getZ(), quat1.getZ(), select1 ),
-        vec_sel( quat0.getW(), quat1.getW(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Quat & quat )
-{
-    Aos::Quat vec0, vec1, vec2, vec3;
-    quat.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Quat & quat, const char * name )
-{
-    Aos::Quat vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    quat.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_CPP_H
+#define _VECTORMATH_QUAT_SOA_CPP_H
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Soa {
+
+inline Quat::Quat( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+}
+
+inline Quat::Quat( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Quat::Quat( const Vector3 & xyz, vec_float4 _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Quat::Quat( const Vector4 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = vec.getW();
+}
+
+inline Quat::Quat( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline Quat::Quat( Aos::Quat quat )
+{
+    vec_float4 vec128 = quat.get128();
+    mX = vec_splat( vec128, 0 );
+    mY = vec_splat( vec128, 1 );
+    mZ = vec_splat( vec128, 2 );
+    mW = vec_splat( vec128, 3 );
+}
+
+inline Quat::Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( quat0.get128(), quat2.get128() );
+    tmp1 = vec_mergeh( quat1.get128(), quat3.get128() );
+    tmp2 = vec_mergel( quat0.get128(), quat2.get128() );
+    tmp3 = vec_mergel( quat1.get128(), quat3.get128() );
+    mX = vec_mergeh( tmp0, tmp1 );
+    mY = vec_mergel( tmp0, tmp1 );
+    mZ = vec_mergeh( tmp2, tmp3 );
+    mW = vec_mergel( tmp2, tmp3 );
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 )
+{
+    Quat start;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitQuat0, unitQuat1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){0.0f,0.0f,0.0f,0.0f}, cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start.setX( vec_sel( unitQuat0.getX(), negatef4( unitQuat0.getX() ), selectMask ) );
+    start.setY( vec_sel( unitQuat0.getY(), negatef4( unitQuat0.getY() ), selectMask ) );
+    start.setZ( vec_sel( unitQuat0.getZ(), negatef4( unitQuat0.getZ() ), selectMask ) );
+    start.setW( vec_sel( unitQuat0.getW(), negatef4( unitQuat0.getW() ), selectMask ) );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
+}
+
+inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( vec_madd( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), t, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), tmp0, tmp1 );
+}
+
+inline void Quat::get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( mX, mZ );
+    tmp1 = vec_mergeh( mY, mW );
+    tmp2 = vec_mergel( mX, mZ );
+    tmp3 = vec_mergel( mY, mW );
+    result0 = Aos::Quat( vec_mergeh( tmp0, tmp1 ) );
+    result1 = Aos::Quat( vec_mergel( tmp0, tmp1 ) );
+    result2 = Aos::Quat( vec_mergeh( tmp2, tmp3 ) );
+    result3 = Aos::Quat( vec_mergel( tmp2, tmp3 ) );
+}
+
+inline Quat & Quat::operator =( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Quat & Quat::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Quat::getX( ) const
+{
+    return mX;
+}
+
+inline Quat & Quat::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Quat::getY( ) const
+{
+    return mY;
+}
+
+inline Quat & Quat::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Quat::getZ( ) const
+{
+    return mZ;
+}
+
+inline Quat & Quat::setW( vec_float4 _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline vec_float4 Quat::getW( ) const
+{
+    return mW;
+}
+
+inline Quat & Quat::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Quat::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Quat::vec_float4_t & Quat::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Quat::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Quat Quat::operator +( const Quat & quat ) const
+{
+    return Quat(
+        vec_add( mX, quat.mX ),
+        vec_add( mY, quat.mY ),
+        vec_add( mZ, quat.mZ ),
+        vec_add( mW, quat.mW )
+    );
+}
+
+inline const Quat Quat::operator -( const Quat & quat ) const
+{
+    return Quat(
+        vec_sub( mX, quat.mX ),
+        vec_sub( mY, quat.mY ),
+        vec_sub( mZ, quat.mZ ),
+        vec_sub( mW, quat.mW )
+    );
+}
+
+inline const Quat Quat::operator *( vec_float4 scalar ) const
+{
+    return Quat(
+        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mW, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline Quat & Quat::operator +=( const Quat & quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( const Quat & quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( vec_float4 scalar ) const
+{
+    return Quat(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar ),
+        divf4( mW, scalar )
+    );
+}
+
+inline Quat & Quat::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ ),
+        negatef4( mW )
+    );
+}
+
+inline const Quat operator *( vec_float4 scalar, const Quat & quat )
+{
+    return quat * scalar;
+}
+
+inline vec_float4 dot( const Quat & quat0, const Quat & quat1 )
+{
+    vec_float4 result;
+    result = vec_madd( quat0.getX(), quat1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( quat0.getY(), quat1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat0.getZ(), quat1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat0.getW(), quat1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 norm( const Quat & quat )
+{
+    vec_float4 result;
+    result = vec_madd( quat.getX(), quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( quat.getY(), quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat.getZ(), quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( quat.getW(), quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 length( const Quat & quat )
+{
+    return sqrtf4( norm( quat ) );
+}
+
+inline const Quat normalize( const Quat & quat )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = norm( quat );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    return Quat(
+        vec_madd( quat.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( quat.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( quat.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( quat.getW(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf4( vec_madd( ((vec_float4){2.0f,2.0f,2.0f,2.0f}), vec_add( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), dot( unitVec0, unitVec1 ) ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    recipCosHalfAngleX2 = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), cosHalfAngleX2 );
+    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), vec_madd( cosHalfAngleX2, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Quat Quat::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    return Quat( ( unitVec * s ), c );
+}
+
+inline const Quat Quat::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    return Quat( s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+}
+
+inline const Quat Quat::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), c );
+}
+
+inline const Quat Quat::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = vec_madd( radians, ((vec_float4){0.5f,0.5f,0.5f,0.5f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sincosf4( angle, &s, &c );
+    return Quat( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), s, c );
+}
+
+inline const Quat Quat::operator *( const Quat & quat ) const
+{
+    return Quat(
+        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mX, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mY, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mX, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_sub( vec_add( vec_add( vec_madd( mW, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mZ, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mX, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_sub( vec_sub( vec_sub( vec_madd( mW, quat.mW, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( mX, quat.mX, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mY, quat.mY, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( mZ, quat.mZ, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline Quat & Quat::operator *=( const Quat & quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = vec_sub( vec_add( vec_madd( quat.getW(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getY(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getZ(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpY = vec_sub( vec_add( vec_madd( quat.getW(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getZ(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getX(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpZ = vec_sub( vec_add( vec_madd( quat.getW(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getX(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getY(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    tmpW = vec_add( vec_add( vec_madd( quat.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( quat.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( quat.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return Vector3(
+        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpX, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpY, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpZ, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_add( vec_sub( vec_add( vec_madd( tmpW, quat.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( tmpZ, quat.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpX, quat.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), vec_madd( tmpY, quat.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Quat conj( const Quat & quat )
+{
+    return Quat( negatef4( quat.getX() ), negatef4( quat.getY() ), negatef4( quat.getZ() ), quat.getW() );
+}
+
+inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 )
+{
+    return Quat(
+        vec_sel( quat0.getX(), quat1.getX(), select1 ),
+        vec_sel( quat0.getY(), quat1.getY(), select1 ),
+        vec_sel( quat0.getZ(), quat1.getZ(), select1 ),
+        vec_sel( quat0.getW(), quat1.getW(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Quat & quat )
+{
+    Aos::Quat vec0, vec1, vec2, vec3;
+    quat.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Quat & quat, const char * name )
+{
+    Aos::Quat vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    quat.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_aos.h
index dc1f9849b..ef5689e99 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_aos.h
@@ -1,1492 +1,1492 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_CPP_H
-#define _VECTORMATH_VEC_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Constants
-// for permutes words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
-#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
-#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
-#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
-#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
-#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
-#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
-#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
-#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
-    return vec_madd( vec_sld( vec0, vec0, 8 ), vec_sld( vec1, vec1, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
-    return vec_add( vec_sld( result, result, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
-    tmp0 = vec_perm( vec0, vec0, _VECTORMATH_PERM_YZXW );
-    tmp1 = vec_perm( vec1, vec1, _VECTORMATH_PERM_ZXYW );
-    tmp2 = vec_perm( vec0, vec0, _VECTORMATH_PERM_ZXYW );
-    tmp3 = vec_perm( vec1, vec1, _VECTORMATH_PERM_YZXW );
-    result = vec_madd( tmp0, tmp1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
-    result = vec_nmsub( tmp2, tmp3, result );
-    return result;
-}
-
-static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
-{
-    vec_int4 bexp;
-    vec_uint4 mant, sign, hfloat;
-    vec_uint4 notZero, isInf;
-    const vec_uint4 hfloatInf = (vec_uint4){0x00007c00u,0x00007c00u,0x00007c00u,0x00007c00u};
-    const vec_uint4 mergeMant = (vec_uint4){0x000003ffu,0x000003ffu,0x000003ffu,0x000003ffu};
-    const vec_uint4 mergeSign = (vec_uint4){0x00008000u,0x00008000u,0x00008000u,0x00008000u};
-
-    sign = vec_sr((vec_uint4)v, (vec_uint4){16,16,16,16});
-    mant = vec_sr((vec_uint4)v, (vec_uint4){13,13,13,13});
-    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4){23,23,23,23}), (vec_int4){0xff,0xff,0xff,0xff});
-
-    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4){112,112,112,112});
-    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4){142,142,142,142});
-
-    bexp = vec_add(bexp, (vec_int4){-112,-112,-112,-112});
-    bexp = vec_sl(bexp, (vec_uint4){10,10,10,10});
-
-    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
-    hfloat = vec_sel((vec_uint4){0,0,0,0}, hfloat, notZero);
-    hfloat = vec_sel(hfloat, hfloatInf, isInf);
-    hfloat = vec_sel(hfloat, sign, mergeSign);
-
-    return hfloat;
-}
-
-static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
-{
-    vec_uint4 hfloat_u, hfloat_v;
-    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
-    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
-    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
-    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
-}
-
-#ifndef __GNUC__
-#define __builtin_constant_p(x) 0
-#endif
-
-static inline vec_float4 _vmathVfInsert(vec_float4 dst, vec_float4 src, int slot)
-{
-#ifdef __GNUC__
-    if (__builtin_constant_p(slot)) {
-        dst = vec_sld(dst, dst, slot<<2);
-        dst = vec_sld(dst, src, 4);
-        if (slot != 3) dst = vec_sld(dst, dst, (3-slot)<<2);
-        return dst;
-    } else
-#endif
-    {
-        vec_uchar16 shiftpattern = vec_lvsr( 0, (float *)(size_t)(slot<<2) );
-        vec_uint4 selectmask = (vec_uint4)vec_perm( (vec_uint4){0,0,0,0}, _VECTORMATH_MASK_0xF000, shiftpattern );
-        return vec_sel( dst, src, selectmask );
-    }
-}
-
-#define _vmathVfGetElement(vec, slot) ((float *)&(vec))[slot]
-#ifdef _VECTORMATH_SET_CONSTS_IN_MEM
-#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
-#else
-#define _vmathVfSetElement(vec, scalar, slot)                                            \
-{                                                                                        \
-    if (__builtin_constant_p(scalar)) {                                                  \
-        (vec) = _vmathVfInsert(vec, (vec_float4){scalar, scalar, scalar, scalar}, slot); \
-    } else {                                                                             \
-        ((float *)&(vec))[slot] = scalar;                                                \
-    }                                                                                    \
-}
-#endif
-
-static inline vec_float4 _vmathVfSplatScalar(float scalar)
-{
-    vec_float4 result;
-    if (__builtin_constant_p(scalar)) {
-        result = (vec_float4){scalar, scalar, scalar, scalar};
-    } else {
-        result = vec_ld(0, &scalar);
-        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
-    } 
-    return result;
-}
-
-static inline vec_uint4 _vmathVuiSplatScalar(unsigned int scalar)
-{
-    vec_uint4 result;
-    if (__builtin_constant_p(scalar)) {
-        result = (vec_uint4){scalar, scalar, scalar, scalar};
-    } else {
-        result = vec_ld(0, &scalar);
-        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
-    } 
-    return result;
-}
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline VecIdx::operator floatInVec() const
-{
-    return floatInVec(ref, i);
-}
-
-inline float VecIdx::getAsFloat() const
-#else
-inline VecIdx::operator float() const
-#endif
-{
-    return _vmathVfGetElement(ref, i);
-}
-
-inline float VecIdx::operator =( float scalar )
-{
-    _vmathVfSetElement(ref, scalar, i);
-    return scalar;
-}
-
-inline floatInVec VecIdx::operator =( floatInVec scalar )
-{
-    ref = _vmathVfInsert(ref, scalar.get128(), i);
-    return scalar;
-}
-
-inline floatInVec VecIdx::operator =( const VecIdx& scalar )
-{
-    return *this = floatInVec(scalar.ref, scalar.i);
-}
-
-inline floatInVec VecIdx::operator *=( float scalar )
-{
-    return *this *= floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator *=( floatInVec scalar )
-{
-    return *this = floatInVec(ref, i) * scalar;
-}
-
-inline floatInVec VecIdx::operator /=( float scalar )
-{
-    return *this /= floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator /=( floatInVec scalar )
-{
-    return *this = floatInVec(ref, i) / scalar;
-}
-
-inline floatInVec VecIdx::operator +=( float scalar )
-{
-    return *this += floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator +=( floatInVec scalar )
-{
-    return *this = floatInVec(ref, i) + scalar;
-}
-
-inline floatInVec VecIdx::operator -=( float scalar )
-{
-    return *this -= floatInVec(scalar);
-}
-
-inline floatInVec VecIdx::operator -=( floatInVec scalar )
-{
-    return *this = floatInVec(ref, i) - scalar;
-}
-
-inline Vector3::Vector3( float _x, float _y, float _z )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
-        mVec128 = (vec_float4){_x, _y, _z, 0.0f};
-    } else {
-        float *pf = (float *)&mVec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-    }
-}
-
-inline Vector3::Vector3( floatInVec _x, floatInVec _y, floatInVec _z )
-{
-    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
-    mVec128 = vec_mergeh( xz, _y.get128() );
-}
-
-inline Vector3::Vector3( Point3 pnt )
-{
-    mVec128 = pnt.get128();
-}
-
-inline Vector3::Vector3( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Vector3::Vector3( floatInVec scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Vector3::Vector3( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_1000 );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_0100 );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_0010 );
-}
-
-inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 )
-{
-    return lerp( floatInVec(t), vec0, vec1 );
-}
-
-inline const Vector3 lerp( floatInVec t, Vector3 vec0, Vector3 vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 )
-{
-    return slerp( floatInVec(t), unitVec0, unitVec1 );
-}
-
-inline const Vector3 slerp( floatInVec t, Vector3 unitVec0, Vector3 unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = t.get128();
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    return Vector3( vec_madd( unitVec0.get128(), scale0, vec_madd( unitVec1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-}
-
-inline vec_float4 Vector3::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeXYZ( Vector3 vec, vec_float4 * quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
-    dstVec = vec_sel(vec.get128(), dstVec, mask);
-    *quad = dstVec;
-}
-
-inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = vec_sld( xyzx, yzxy, 12 );
-    xyz2 = vec_sld( yzxy, zxyz, 8 );
-    xyz3 = vec_sld( zxyz, zxyz, 4 );
-    vec0 = Vector3( xyzx );
-    vec1 = Vector3( xyz1 );
-    vec2 = Vector3( xyz2 );
-    vec3 = Vector3( xyz3 );
-}
-
-inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = vec_perm( vec0.get128(), vec1.get128(), _VECTORMATH_PERM_XYZA );
-    yzxy = vec_perm( vec1.get128(), vec2.get128(), _VECTORMATH_PERM_YZAB );
-    zxyz = vec_perm( vec2.get128(), vec3.get128(), _VECTORMATH_PERM_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
-    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Vector3 & Vector3::operator =( Vector3 vec )
-{
-    mVec128 = vec.mVec128;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( floatInVec _x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Vector3::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Vector3 & Vector3::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Vector3 & Vector3::setY( floatInVec _y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Vector3::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Vector3 & Vector3::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Vector3 & Vector3::setZ( floatInVec _z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Vector3::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Vector3 & Vector3::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Vector3 & Vector3::setElem( int idx, floatInVec value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Vector3::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Vector3::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Vector3::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Vector3 Vector3::operator +( Vector3 vec ) const
-{
-    return Vector3( vec_add( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector3 Vector3::operator -( Vector3 vec ) const
-{
-    return Vector3( vec_sub( mVec128, vec.mVec128 ) );
-}
-
-inline const Point3 Vector3::operator +( Point3 pnt ) const
-{
-    return Point3( vec_add( mVec128, pnt.get128() ) );
-}
-
-inline const Vector3 Vector3::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Vector3 Vector3::operator *( floatInVec scalar ) const
-{
-    return Vector3( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline Vector3 & Vector3::operator +=( Vector3 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( Vector3 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( floatInVec scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( float scalar ) const
-{
-    return *this / floatInVec(scalar);
-}
-
-inline const Vector3 Vector3::operator /( floatInVec scalar ) const
-{
-    return Vector3( divf4( mVec128, scalar.get128() ) );
-}
-
-inline Vector3 & Vector3::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator /=( floatInVec scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3( negatef4( mVec128 ) );
-}
-
-inline const Vector3 operator *( float scalar, Vector3 vec )
-{
-    return floatInVec(scalar) * vec;
-}
-
-inline const Vector3 operator *( floatInVec scalar, Vector3 vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( vec_madd( vec0.get128(), vec1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( divf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 recipPerElem( Vector3 vec )
-{
-    return Vector3( recipf4( vec.get128() ) );
-}
-
-inline const Vector3 sqrtPerElem( Vector3 vec )
-{
-    return Vector3( sqrtf4( vec.get128() ) );
-}
-
-inline const Vector3 rsqrtPerElem( Vector3 vec )
-{
-    return Vector3( rsqrtf4( vec.get128() ) );
-}
-
-inline const Vector3 absPerElem( Vector3 vec )
-{
-    return Vector3( fabsf4( vec.get128() ) );
-}
-
-inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( copysignf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( fmaxf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec maxElem( Vector3 vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = fmaxf4( vec_splat( vec.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( fminf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec minElem( Vector3 vec )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = fminf4( vec_splat( vec.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const floatInVec sum( Vector3 vec )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = vec_add( vec_splat( vec.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const floatInVec dot( Vector3 vec0, Vector3 vec1 )
-{
-    return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
-}
-
-inline const floatInVec lengthSqr( Vector3 vec )
-{
-    return floatInVec(  _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
-}
-
-inline const floatInVec length( Vector3 vec )
-{
-    return floatInVec(  sqrtf4(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
-}
-
-inline const Vector3 normalize( Vector3 vec )
-{
-    vec_float4 dot = _vmathVfDot3( vec.get128(), vec.get128() );
-    dot = vec_splat( dot, 0 );
-    return Vector3( vec_madd( vec.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Vector3 cross( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 )
-{
-    return select( vec0, vec1, boolInVec(select1) );
-}
-
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, boolInVec select1 )
-{
-    return Vector3( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Vector3 vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-inline void print( Vector3 vec, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-inline Vector4::Vector4( float _x, float _y, float _z, float _w )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
-        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
-        mVec128 = (vec_float4){_x, _y, _z, _w};
-    } else {
-        float *pf = (float *)&mVec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-        pf[3] = _w;
-    }
-}
-
-inline Vector4::Vector4( floatInVec _x, floatInVec _y, floatInVec _z, floatInVec _w )
-{
-    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
-    vec_float4 yw = vec_mergeh( _y.get128(), _w.get128() );
-    mVec128 = vec_mergeh( xz, yw );
-}
-
-inline Vector4::Vector4( Vector3 xyz, float _w )
-{
-    mVec128 = xyz.get128();
-    _vmathVfSetElement(mVec128, _w, 3);
-}
-
-inline Vector4::Vector4( Vector3 xyz, floatInVec _w )
-{
-    mVec128 = xyz.get128();
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-}
-
-inline Vector4::Vector4( Vector3 vec )
-{
-    mVec128 = vec.get128();
-    mVec128 = _vmathVfInsert(mVec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), 3);
-}
-
-inline Vector4::Vector4( Point3 pnt )
-{
-    mVec128 = pnt.get128();
-    mVec128 = _vmathVfInsert(mVec128, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), 3);
-}
-
-inline Vector4::Vector4( Quat quat )
-{
-    mVec128 = quat.get128();
-}
-
-inline Vector4::Vector4( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Vector4::Vector4( floatInVec scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Vector4::Vector4( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_1000 );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0100 );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0010 );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0001 );
-}
-
-inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 )
-{
-    return lerp( floatInVec(t), vec0, vec1 );
-}
-
-inline const Vector4 lerp( floatInVec t, Vector4 vec0, Vector4 vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 )
-{
-    return slerp( floatInVec(t), unitVec0, unitVec1 );
-}
-
-inline const Vector4 slerp( floatInVec t, Vector4 unitVec0, Vector4 unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = vec_splat( cosAngle, 0 );
-    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = t.get128();
-    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
-    angles = vec_mergeh( angles, oneMinusT );
-    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    sines = sinf4( angles );
-    scales = divf4( sines, vec_splat( sines, 0 ) );
-    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
-    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
-    return Vector4( vec_madd( unitVec0.get128(), scale0, vec_madd( unitVec1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
-}
-
-inline vec_float4 Vector4::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads )
-{
-    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
-    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
-}
-
-inline Vector4 & Vector4::operator =( Vector4 vec )
-{
-    mVec128 = vec.mVec128;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( Vector3 vec )
-{
-    mVec128 = vec_sel( vec.get128(), mVec128, _VECTORMATH_MASK_0x000F );
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mVec128 );
-}
-
-inline Vector4 & Vector4::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Vector4 & Vector4::setX( floatInVec _x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Vector4::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Vector4 & Vector4::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Vector4 & Vector4::setY( floatInVec _y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Vector4::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Vector4 & Vector4::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Vector4 & Vector4::setZ( floatInVec _z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Vector4::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Vector4 & Vector4::setW( float _w )
-{
-    _vmathVfSetElement(mVec128, _w, 3);
-    return *this;
-}
-
-inline Vector4 & Vector4::setW( floatInVec _w )
-{
-    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
-    return *this;
-}
-
-inline const floatInVec Vector4::getW( ) const
-{
-    return floatInVec( mVec128, 3 );
-}
-
-inline Vector4 & Vector4::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Vector4 & Vector4::setElem( int idx, floatInVec value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Vector4::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Vector4::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Vector4::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Vector4 Vector4::operator +( Vector4 vec ) const
-{
-    return Vector4( vec_add( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector4 Vector4::operator -( Vector4 vec ) const
-{
-    return Vector4( vec_sub( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector4 Vector4::operator *( float scalar ) const
-{
-    return *this * floatInVec(scalar);
-}
-
-inline const Vector4 Vector4::operator *( floatInVec scalar ) const
-{
-    return Vector4( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline Vector4 & Vector4::operator +=( Vector4 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( Vector4 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( floatInVec scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( float scalar ) const
-{
-    return *this / floatInVec(scalar);
-}
-
-inline const Vector4 Vector4::operator /( floatInVec scalar ) const
-{
-    return Vector4( divf4( mVec128, scalar.get128() ) );
-}
-
-inline Vector4 & Vector4::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator /=( floatInVec scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4( negatef4( mVec128 ) );
-}
-
-inline const Vector4 operator *( float scalar, Vector4 vec )
-{
-    return floatInVec(scalar) * vec;
-}
-
-inline const Vector4 operator *( floatInVec scalar, Vector4 vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( vec_madd( vec0.get128(), vec1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( divf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 recipPerElem( Vector4 vec )
-{
-    return Vector4( recipf4( vec.get128() ) );
-}
-
-inline const Vector4 sqrtPerElem( Vector4 vec )
-{
-    return Vector4( sqrtf4( vec.get128() ) );
-}
-
-inline const Vector4 rsqrtPerElem( Vector4 vec )
-{
-    return Vector4( rsqrtf4( vec.get128() ) );
-}
-
-inline const Vector4 absPerElem( Vector4 vec )
-{
-    return Vector4( fabsf4( vec.get128() ) );
-}
-
-inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( copysignf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( fmaxf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec maxElem( Vector4 vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = fmaxf4( vec_splat( vec.get128(), 2 ), result );
-    result = fmaxf4( vec_splat( vec.get128(), 3 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( fminf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const floatInVec minElem( Vector4 vec )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = fminf4( vec_splat( vec.get128(), 2 ), result );
-    result = fminf4( vec_splat( vec.get128(), 3 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const floatInVec sum( Vector4 vec )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( vec.get128(), 1 ), vec.get128() );
-    result = vec_add( vec_splat( vec.get128(), 2 ), result );
-    result = vec_add( vec_splat( vec.get128(), 3 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const floatInVec dot( Vector4 vec0, Vector4 vec1 )
-{
-    return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
-}
-
-inline const floatInVec lengthSqr( Vector4 vec )
-{
-    return floatInVec(  _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
-}
-
-inline const floatInVec length( Vector4 vec )
-{
-    return floatInVec(  sqrtf4(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
-}
-
-inline const Vector4 normalize( Vector4 vec )
-{
-    vec_float4 dot = _vmathVfDot4( vec.get128(), vec.get128() );
-    return Vector4( vec_madd( vec.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 )
-{
-    return select( vec0, vec1, boolInVec(select1) );
-}
-
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, boolInVec select1 )
-{
-    return Vector4( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Vector4 vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-inline void print( Vector4 vec, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-inline Point3::Point3( float _x, float _y, float _z )
-{
-    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
-        mVec128 = (vec_float4){_x, _y, _z, 0.0f};
-    } else {
-        float *pf = (float *)&mVec128;
-        pf[0] = _x;
-        pf[1] = _y;
-        pf[2] = _z;
-    }
-}
-
-inline Point3::Point3( floatInVec _x, floatInVec _y, floatInVec _z )
-{
-    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
-    mVec128 = vec_mergeh( xz, _y.get128() );
-}
-
-inline Point3::Point3( Vector3 vec )
-{
-    mVec128 = vec.get128();
-}
-
-inline Point3::Point3( float scalar )
-{
-    mVec128 = floatInVec(scalar).get128();
-}
-
-inline Point3::Point3( floatInVec scalar )
-{
-    mVec128 = scalar.get128();
-}
-
-inline Point3::Point3( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 )
-{
-    return lerp( floatInVec(t), pnt0, pnt1 );
-}
-
-inline const Point3 lerp( floatInVec t, Point3 pnt0, Point3 pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline vec_float4 Point3::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeXYZ( Point3 pnt, vec_float4 * quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
-    dstVec = vec_sel(pnt.get128(), dstVec, mask);
-    *quad = dstVec;
-}
-
-inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = vec_sld( xyzx, yzxy, 12 );
-    xyz2 = vec_sld( yzxy, zxyz, 8 );
-    xyz3 = vec_sld( zxyz, zxyz, 4 );
-    pnt0 = Point3( xyzx );
-    pnt1 = Point3( xyz1 );
-    pnt2 = Point3( xyz2 );
-    pnt3 = Point3( xyz3 );
-}
-
-inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = vec_perm( pnt0.get128(), pnt1.get128(), _VECTORMATH_PERM_XYZA );
-    yzxy = vec_perm( pnt1.get128(), pnt2.get128(), _VECTORMATH_PERM_YZAB );
-    zxyz = vec_perm( pnt2.get128(), pnt3.get128(), _VECTORMATH_PERM_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
-    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Point3 & Point3::operator =( Point3 pnt )
-{
-    mVec128 = pnt.mVec128;
-    return *this;
-}
-
-inline Point3 & Point3::setX( float _x )
-{
-    _vmathVfSetElement(mVec128, _x, 0);
-    return *this;
-}
-
-inline Point3 & Point3::setX( floatInVec _x )
-{
-    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
-    return *this;
-}
-
-inline const floatInVec Point3::getX( ) const
-{
-    return floatInVec( mVec128, 0 );
-}
-
-inline Point3 & Point3::setY( float _y )
-{
-    _vmathVfSetElement(mVec128, _y, 1);
-    return *this;
-}
-
-inline Point3 & Point3::setY( floatInVec _y )
-{
-    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
-    return *this;
-}
-
-inline const floatInVec Point3::getY( ) const
-{
-    return floatInVec( mVec128, 1 );
-}
-
-inline Point3 & Point3::setZ( float _z )
-{
-    _vmathVfSetElement(mVec128, _z, 2);
-    return *this;
-}
-
-inline Point3 & Point3::setZ( floatInVec _z )
-{
-    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
-    return *this;
-}
-
-inline const floatInVec Point3::getZ( ) const
-{
-    return floatInVec( mVec128, 2 );
-}
-
-inline Point3 & Point3::setElem( int idx, float value )
-{
-    _vmathVfSetElement(mVec128, value, idx);
-    return *this;
-}
-
-inline Point3 & Point3::setElem( int idx, floatInVec value )
-{
-    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
-    return *this;
-}
-
-inline const floatInVec Point3::getElem( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline VecIdx Point3::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline const floatInVec Point3::operator []( int idx ) const
-{
-    return floatInVec( mVec128, idx );
-}
-
-inline const Vector3 Point3::operator -( Point3 pnt ) const
-{
-    return Vector3( vec_sub( mVec128, pnt.mVec128 ) );
-}
-
-inline const Point3 Point3::operator +( Vector3 vec ) const
-{
-    return Point3( vec_add( mVec128, vec.get128() ) );
-}
-
-inline const Point3 Point3::operator -( Vector3 vec ) const
-{
-    return Point3( vec_sub( mVec128, vec.get128() ) );
-}
-
-inline Point3 & Point3::operator +=( Vector3 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( Vector3 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( vec_madd( pnt0.get128(), pnt1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-}
-
-inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( divf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 recipPerElem( Point3 pnt )
-{
-    return Point3( recipf4( pnt.get128() ) );
-}
-
-inline const Point3 sqrtPerElem( Point3 pnt )
-{
-    return Point3( sqrtf4( pnt.get128() ) );
-}
-
-inline const Point3 rsqrtPerElem( Point3 pnt )
-{
-    return Point3( rsqrtf4( pnt.get128() ) );
-}
-
-inline const Point3 absPerElem( Point3 pnt )
-{
-    return Point3( fabsf4( pnt.get128() ) );
-}
-
-inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( copysignf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( fmaxf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const floatInVec maxElem( Point3 pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( vec_splat( pnt.get128(), 1 ), pnt.get128() );
-    result = fmaxf4( vec_splat( pnt.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( fminf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const floatInVec minElem( Point3 pnt )
-{
-    vec_float4 result;
-    result = fminf4( vec_splat( pnt.get128(), 1 ), pnt.get128() );
-    result = fminf4( vec_splat( pnt.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const floatInVec sum( Point3 pnt )
-{
-    vec_float4 result;
-    result = vec_add( vec_splat( pnt.get128(), 1 ), pnt.get128() );
-    result = vec_add( vec_splat( pnt.get128(), 2 ), result );
-    return floatInVec( result, 0 );
-}
-
-inline const Point3 scale( Point3 pnt, float scaleVal )
-{
-    return scale( pnt, floatInVec( scaleVal ) );
-}
-
-inline const Point3 scale( Point3 pnt, floatInVec scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( Point3 pnt, Vector3 scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline const floatInVec projection( Point3 pnt, Vector3 unitVec )
-{
-    return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
-}
-
-inline const floatInVec distSqrFromOrigin( Point3 pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline const floatInVec distFromOrigin( Point3 pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline const floatInVec distSqr( Point3 pnt0, Point3 pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline const floatInVec dist( Point3 pnt0, Point3 pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 )
-{
-    return select( pnt0, pnt1, boolInVec(select1) );
-}
-
-inline const Point3 select( Point3 pnt0, Point3 pnt1, boolInVec select1 )
-{
-    return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Point3 pnt )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt.get128();
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-inline void print( Point3 pnt, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt.get128();
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Constants
+// for permutes words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
+#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
+#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
+#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
+#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
+#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
+#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
+#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
+    return vec_madd( vec_sld( vec0, vec0, 8 ), vec_sld( vec1, vec1, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0, vec1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_madd( vec_sld( vec0, vec0, 4 ), vec_sld( vec1, vec1, 4 ), result );
+    return vec_add( vec_sld( result, result, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = vec_perm( vec0, vec0, _VECTORMATH_PERM_YZXW );
+    tmp1 = vec_perm( vec1, vec1, _VECTORMATH_PERM_ZXYW );
+    tmp2 = vec_perm( vec0, vec0, _VECTORMATH_PERM_ZXYW );
+    tmp3 = vec_perm( vec1, vec1, _VECTORMATH_PERM_YZXW );
+    result = vec_madd( tmp0, tmp1, (vec_float4){0.0f,0.0f,0.0f,0.0f} );
+    result = vec_nmsub( tmp2, tmp3, result );
+    return result;
+}
+
+static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
+{
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = (vec_uint4){0x00007c00u,0x00007c00u,0x00007c00u,0x00007c00u};
+    const vec_uint4 mergeMant = (vec_uint4){0x000003ffu,0x000003ffu,0x000003ffu,0x000003ffu};
+    const vec_uint4 mergeSign = (vec_uint4){0x00008000u,0x00008000u,0x00008000u,0x00008000u};
+
+    sign = vec_sr((vec_uint4)v, (vec_uint4){16,16,16,16});
+    mant = vec_sr((vec_uint4)v, (vec_uint4){13,13,13,13});
+    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4){23,23,23,23}), (vec_int4){0xff,0xff,0xff,0xff});
+
+    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4){112,112,112,112});
+    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4){142,142,142,142});
+
+    bexp = vec_add(bexp, (vec_int4){-112,-112,-112,-112});
+    bexp = vec_sl(bexp, (vec_uint4){10,10,10,10});
+
+    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = vec_sel((vec_uint4){0,0,0,0}, hfloat, notZero);
+    hfloat = vec_sel(hfloat, hfloatInf, isInf);
+    hfloat = vec_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+}
+
+static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
+{
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
+}
+
+#ifndef __GNUC__
+#define __builtin_constant_p(x) 0
+#endif
+
+static inline vec_float4 _vmathVfInsert(vec_float4 dst, vec_float4 src, int slot)
+{
+#ifdef __GNUC__
+    if (__builtin_constant_p(slot)) {
+        dst = vec_sld(dst, dst, slot<<2);
+        dst = vec_sld(dst, src, 4);
+        if (slot != 3) dst = vec_sld(dst, dst, (3-slot)<<2);
+        return dst;
+    } else
+#endif
+    {
+        vec_uchar16 shiftpattern = vec_lvsr( 0, (float *)(size_t)(slot<<2) );
+        vec_uint4 selectmask = (vec_uint4)vec_perm( (vec_uint4){0,0,0,0}, _VECTORMATH_MASK_0xF000, shiftpattern );
+        return vec_sel( dst, src, selectmask );
+    }
+}
+
+#define _vmathVfGetElement(vec, slot) ((float *)&(vec))[slot]
+#ifdef _VECTORMATH_SET_CONSTS_IN_MEM
+#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
+#else
+#define _vmathVfSetElement(vec, scalar, slot)                                            \
+{                                                                                        \
+    if (__builtin_constant_p(scalar)) {                                                  \
+        (vec) = _vmathVfInsert(vec, (vec_float4){scalar, scalar, scalar, scalar}, slot); \
+    } else {                                                                             \
+        ((float *)&(vec))[slot] = scalar;                                                \
+    }                                                                                    \
+}
+#endif
+
+static inline vec_float4 _vmathVfSplatScalar(float scalar)
+{
+    vec_float4 result;
+    if (__builtin_constant_p(scalar)) {
+        result = (vec_float4){scalar, scalar, scalar, scalar};
+    } else {
+        result = vec_ld(0, &scalar);
+        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
+    } 
+    return result;
+}
+
+static inline vec_uint4 _vmathVuiSplatScalar(unsigned int scalar)
+{
+    vec_uint4 result;
+    if (__builtin_constant_p(scalar)) {
+        result = (vec_uint4){scalar, scalar, scalar, scalar};
+    } else {
+        result = vec_ld(0, &scalar);
+        result = vec_splat(vec_perm(result, result, vec_lvsl(0, &scalar)), 0);
+    } 
+    return result;
+}
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline VecIdx::operator floatInVec() const
+{
+    return floatInVec(ref, i);
+}
+
+inline float VecIdx::getAsFloat() const
+#else
+inline VecIdx::operator float() const
+#endif
+{
+    return _vmathVfGetElement(ref, i);
+}
+
+inline float VecIdx::operator =( float scalar )
+{
+    _vmathVfSetElement(ref, scalar, i);
+    return scalar;
+}
+
+inline floatInVec VecIdx::operator =( floatInVec scalar )
+{
+    ref = _vmathVfInsert(ref, scalar.get128(), i);
+    return scalar;
+}
+
+inline floatInVec VecIdx::operator =( const VecIdx& scalar )
+{
+    return *this = floatInVec(scalar.ref, scalar.i);
+}
+
+inline floatInVec VecIdx::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator *=( floatInVec scalar )
+{
+    return *this = floatInVec(ref, i) * scalar;
+}
+
+inline floatInVec VecIdx::operator /=( float scalar )
+{
+    return *this /= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator /=( floatInVec scalar )
+{
+    return *this = floatInVec(ref, i) / scalar;
+}
+
+inline floatInVec VecIdx::operator +=( float scalar )
+{
+    return *this += floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator +=( floatInVec scalar )
+{
+    return *this = floatInVec(ref, i) + scalar;
+}
+
+inline floatInVec VecIdx::operator -=( float scalar )
+{
+    return *this -= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator -=( floatInVec scalar )
+{
+    return *this = floatInVec(ref, i) - scalar;
+}
+
+inline Vector3::Vector3( float _x, float _y, float _z )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
+        mVec128 = (vec_float4){_x, _y, _z, 0.0f};
+    } else {
+        float *pf = (float *)&mVec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+    }
+}
+
+inline Vector3::Vector3( floatInVec _x, floatInVec _y, floatInVec _z )
+{
+    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
+    mVec128 = vec_mergeh( xz, _y.get128() );
+}
+
+inline Vector3::Vector3( Point3 pnt )
+{
+    mVec128 = pnt.get128();
+}
+
+inline Vector3::Vector3( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Vector3::Vector3( floatInVec scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Vector3::Vector3( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_1000 );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0100 );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0010 );
+}
+
+inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 )
+{
+    return lerp( floatInVec(t), vec0, vec1 );
+}
+
+inline const Vector3 lerp( floatInVec t, Vector3 vec0, Vector3 vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 )
+{
+    return slerp( floatInVec(t), unitVec0, unitVec1 );
+}
+
+inline const Vector3 slerp( floatInVec t, Vector3 unitVec0, Vector3 unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Vector3( vec_madd( unitVec0.get128(), scale0, vec_madd( unitVec1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+}
+
+inline vec_float4 Vector3::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeXYZ( Vector3 vec, vec_float4 * quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
+    dstVec = vec_sel(vec.get128(), dstVec, mask);
+    *quad = dstVec;
+}
+
+inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = vec_sld( xyzx, yzxy, 12 );
+    xyz2 = vec_sld( yzxy, zxyz, 8 );
+    xyz3 = vec_sld( zxyz, zxyz, 4 );
+    vec0 = Vector3( xyzx );
+    vec1 = Vector3( xyz1 );
+    vec2 = Vector3( xyz2 );
+    vec3 = Vector3( xyz3 );
+}
+
+inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = vec_perm( vec0.get128(), vec1.get128(), _VECTORMATH_PERM_XYZA );
+    yzxy = vec_perm( vec1.get128(), vec2.get128(), _VECTORMATH_PERM_YZAB );
+    zxyz = vec_perm( vec2.get128(), vec3.get128(), _VECTORMATH_PERM_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Vector3 & Vector3::operator =( Vector3 vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( floatInVec _x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Vector3::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Vector3 & Vector3::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Vector3 & Vector3::setY( floatInVec _y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Vector3::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Vector3 & Vector3::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Vector3 & Vector3::setZ( floatInVec _z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Vector3::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Vector3 & Vector3::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Vector3 & Vector3::setElem( int idx, floatInVec value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Vector3::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Vector3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Vector3::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Vector3 Vector3::operator +( Vector3 vec ) const
+{
+    return Vector3( vec_add( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector3 Vector3::operator -( Vector3 vec ) const
+{
+    return Vector3( vec_sub( mVec128, vec.mVec128 ) );
+}
+
+inline const Point3 Vector3::operator +( Point3 pnt ) const
+{
+    return Point3( vec_add( mVec128, pnt.get128() ) );
+}
+
+inline const Vector3 Vector3::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Vector3 Vector3::operator *( floatInVec scalar ) const
+{
+    return Vector3( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline Vector3 & Vector3::operator +=( Vector3 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( Vector3 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( floatInVec scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+inline const Vector3 Vector3::operator /( floatInVec scalar ) const
+{
+    return Vector3( divf4( mVec128, scalar.get128() ) );
+}
+
+inline Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator /=( floatInVec scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3( negatef4( mVec128 ) );
+}
+
+inline const Vector3 operator *( float scalar, Vector3 vec )
+{
+    return floatInVec(scalar) * vec;
+}
+
+inline const Vector3 operator *( floatInVec scalar, Vector3 vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( vec_madd( vec0.get128(), vec1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( divf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 recipPerElem( Vector3 vec )
+{
+    return Vector3( recipf4( vec.get128() ) );
+}
+
+inline const Vector3 sqrtPerElem( Vector3 vec )
+{
+    return Vector3( sqrtf4( vec.get128() ) );
+}
+
+inline const Vector3 rsqrtPerElem( Vector3 vec )
+{
+    return Vector3( rsqrtf4( vec.get128() ) );
+}
+
+inline const Vector3 absPerElem( Vector3 vec )
+{
+    return Vector3( fabsf4( vec.get128() ) );
+}
+
+inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( copysignf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( fmaxf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec maxElem( Vector3 vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = fmaxf4( vec_splat( vec.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( fminf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec minElem( Vector3 vec )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = fminf4( vec_splat( vec.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const floatInVec sum( Vector3 vec )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = vec_add( vec_splat( vec.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const floatInVec dot( Vector3 vec0, Vector3 vec1 )
+{
+    return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
+}
+
+inline const floatInVec lengthSqr( Vector3 vec )
+{
+    return floatInVec(  _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
+}
+
+inline const floatInVec length( Vector3 vec )
+{
+    return floatInVec(  sqrtf4(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
+}
+
+inline const Vector3 normalize( Vector3 vec )
+{
+    vec_float4 dot = _vmathVfDot3( vec.get128(), vec.get128() );
+    dot = vec_splat( dot, 0 );
+    return Vector3( vec_madd( vec.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Vector3 cross( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 )
+{
+    return select( vec0, vec1, boolInVec(select1) );
+}
+
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, boolInVec select1 )
+{
+    return Vector3( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Vector3 vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+inline void print( Vector3 vec, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+inline Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) &
+        __builtin_constant_p(_z) & __builtin_constant_p(_w)) {
+        mVec128 = (vec_float4){_x, _y, _z, _w};
+    } else {
+        float *pf = (float *)&mVec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+        pf[3] = _w;
+    }
+}
+
+inline Vector4::Vector4( floatInVec _x, floatInVec _y, floatInVec _z, floatInVec _w )
+{
+    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
+    vec_float4 yw = vec_mergeh( _y.get128(), _w.get128() );
+    mVec128 = vec_mergeh( xz, yw );
+}
+
+inline Vector4::Vector4( Vector3 xyz, float _w )
+{
+    mVec128 = xyz.get128();
+    _vmathVfSetElement(mVec128, _w, 3);
+}
+
+inline Vector4::Vector4( Vector3 xyz, floatInVec _w )
+{
+    mVec128 = xyz.get128();
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+}
+
+inline Vector4::Vector4( Vector3 vec )
+{
+    mVec128 = vec.get128();
+    mVec128 = _vmathVfInsert(mVec128, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), 3);
+}
+
+inline Vector4::Vector4( Point3 pnt )
+{
+    mVec128 = pnt.get128();
+    mVec128 = _vmathVfInsert(mVec128, ((vec_float4){1.0f,1.0f,1.0f,1.0f}), 3);
+}
+
+inline Vector4::Vector4( Quat quat )
+{
+    mVec128 = quat.get128();
+}
+
+inline Vector4::Vector4( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Vector4::Vector4( floatInVec scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Vector4::Vector4( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_1000 );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0100 );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0010 );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0001 );
+}
+
+inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 )
+{
+    return lerp( floatInVec(t), vec0, vec1 );
+}
+
+inline const Vector4 lerp( floatInVec t, Vector4 vec0, Vector4 vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 )
+{
+    return slerp( floatInVec(t), unitVec0, unitVec1 );
+}
+
+inline const Vector4 slerp( floatInVec t, Vector4 unitVec0, Vector4 unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = vec_splat( cosAngle, 0 );
+    selectMask = (vec_uint4)vec_cmpgt( ((vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    sines = sinf4( angles );
+    scales = divf4( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Vector4( vec_madd( unitVec0.get128(), scale0, vec_madd( unitVec1.get128(), scale1, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ) );
+}
+
+inline vec_float4 Vector4::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
+}
+
+inline Vector4 & Vector4::operator =( Vector4 vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( Vector3 vec )
+{
+    mVec128 = vec_sel( vec.get128(), mVec128, _VECTORMATH_MASK_0x000F );
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+inline Vector4 & Vector4::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Vector4 & Vector4::setX( floatInVec _x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Vector4::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Vector4 & Vector4::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Vector4 & Vector4::setY( floatInVec _y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Vector4::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Vector4 & Vector4::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Vector4 & Vector4::setZ( floatInVec _z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Vector4::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Vector4 & Vector4::setW( float _w )
+{
+    _vmathVfSetElement(mVec128, _w, 3);
+    return *this;
+}
+
+inline Vector4 & Vector4::setW( floatInVec _w )
+{
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+    return *this;
+}
+
+inline const floatInVec Vector4::getW( ) const
+{
+    return floatInVec( mVec128, 3 );
+}
+
+inline Vector4 & Vector4::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Vector4 & Vector4::setElem( int idx, floatInVec value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Vector4::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Vector4::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Vector4::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Vector4 Vector4::operator +( Vector4 vec ) const
+{
+    return Vector4( vec_add( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector4 Vector4::operator -( Vector4 vec ) const
+{
+    return Vector4( vec_sub( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector4 Vector4::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+inline const Vector4 Vector4::operator *( floatInVec scalar ) const
+{
+    return Vector4( vec_madd( mVec128, scalar.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline Vector4 & Vector4::operator +=( Vector4 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( Vector4 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( floatInVec scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+inline const Vector4 Vector4::operator /( floatInVec scalar ) const
+{
+    return Vector4( divf4( mVec128, scalar.get128() ) );
+}
+
+inline Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator /=( floatInVec scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4( negatef4( mVec128 ) );
+}
+
+inline const Vector4 operator *( float scalar, Vector4 vec )
+{
+    return floatInVec(scalar) * vec;
+}
+
+inline const Vector4 operator *( floatInVec scalar, Vector4 vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( vec_madd( vec0.get128(), vec1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( divf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 recipPerElem( Vector4 vec )
+{
+    return Vector4( recipf4( vec.get128() ) );
+}
+
+inline const Vector4 sqrtPerElem( Vector4 vec )
+{
+    return Vector4( sqrtf4( vec.get128() ) );
+}
+
+inline const Vector4 rsqrtPerElem( Vector4 vec )
+{
+    return Vector4( rsqrtf4( vec.get128() ) );
+}
+
+inline const Vector4 absPerElem( Vector4 vec )
+{
+    return Vector4( fabsf4( vec.get128() ) );
+}
+
+inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( copysignf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( fmaxf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec maxElem( Vector4 vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = fmaxf4( vec_splat( vec.get128(), 2 ), result );
+    result = fmaxf4( vec_splat( vec.get128(), 3 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( fminf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const floatInVec minElem( Vector4 vec )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = fminf4( vec_splat( vec.get128(), 2 ), result );
+    result = fminf4( vec_splat( vec.get128(), 3 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const floatInVec sum( Vector4 vec )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( vec.get128(), 1 ), vec.get128() );
+    result = vec_add( vec_splat( vec.get128(), 2 ), result );
+    result = vec_add( vec_splat( vec.get128(), 3 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const floatInVec dot( Vector4 vec0, Vector4 vec1 )
+{
+    return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
+}
+
+inline const floatInVec lengthSqr( Vector4 vec )
+{
+    return floatInVec(  _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
+}
+
+inline const floatInVec length( Vector4 vec )
+{
+    return floatInVec(  sqrtf4(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
+}
+
+inline const Vector4 normalize( Vector4 vec )
+{
+    vec_float4 dot = _vmathVfDot4( vec.get128(), vec.get128() );
+    return Vector4( vec_madd( vec.get128(), rsqrtf4( dot ), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 )
+{
+    return select( vec0, vec1, boolInVec(select1) );
+}
+
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, boolInVec select1 )
+{
+    return Vector4( vec_sel( vec0.get128(), vec1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Vector4 vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+inline void print( Vector4 vec, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+inline Point3::Point3( float _x, float _y, float _z )
+{
+    if (__builtin_constant_p(_x) & __builtin_constant_p(_y) & __builtin_constant_p(_z)) {
+        mVec128 = (vec_float4){_x, _y, _z, 0.0f};
+    } else {
+        float *pf = (float *)&mVec128;
+        pf[0] = _x;
+        pf[1] = _y;
+        pf[2] = _z;
+    }
+}
+
+inline Point3::Point3( floatInVec _x, floatInVec _y, floatInVec _z )
+{
+    vec_float4 xz = vec_mergeh( _x.get128(), _z.get128() );
+    mVec128 = vec_mergeh( xz, _y.get128() );
+}
+
+inline Point3::Point3( Vector3 vec )
+{
+    mVec128 = vec.get128();
+}
+
+inline Point3::Point3( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+inline Point3::Point3( floatInVec scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+inline Point3::Point3( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 )
+{
+    return lerp( floatInVec(t), pnt0, pnt1 );
+}
+
+inline const Point3 lerp( floatInVec t, Point3 pnt0, Point3 pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline vec_float4 Point3::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeXYZ( Point3 pnt, vec_float4 * quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = _VECTORMATH_MASK_0x000F;
+    dstVec = vec_sel(pnt.get128(), dstVec, mask);
+    *quad = dstVec;
+}
+
+inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = vec_sld( xyzx, yzxy, 12 );
+    xyz2 = vec_sld( yzxy, zxyz, 8 );
+    xyz3 = vec_sld( zxyz, zxyz, 4 );
+    pnt0 = Point3( xyzx );
+    pnt1 = Point3( xyz1 );
+    pnt2 = Point3( xyz2 );
+    pnt3 = Point3( xyz3 );
+}
+
+inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = vec_perm( pnt0.get128(), pnt1.get128(), _VECTORMATH_PERM_XYZA );
+    yzxy = vec_perm( pnt1.get128(), pnt2.get128(), _VECTORMATH_PERM_YZAB );
+    zxyz = vec_perm( pnt2.get128(), pnt3.get128(), _VECTORMATH_PERM_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Point3 & Point3::operator =( Point3 pnt )
+{
+    mVec128 = pnt.mVec128;
+    return *this;
+}
+
+inline Point3 & Point3::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+inline Point3 & Point3::setX( floatInVec _x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+inline const floatInVec Point3::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+inline Point3 & Point3::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+inline Point3 & Point3::setY( floatInVec _y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+inline const floatInVec Point3::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+inline Point3 & Point3::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+inline Point3 & Point3::setZ( floatInVec _z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+inline const floatInVec Point3::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+inline Point3 & Point3::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+inline Point3 & Point3::setElem( int idx, floatInVec value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+inline const floatInVec Point3::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline VecIdx Point3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline const floatInVec Point3::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+inline const Vector3 Point3::operator -( Point3 pnt ) const
+{
+    return Vector3( vec_sub( mVec128, pnt.mVec128 ) );
+}
+
+inline const Point3 Point3::operator +( Vector3 vec ) const
+{
+    return Point3( vec_add( mVec128, vec.get128() ) );
+}
+
+inline const Point3 Point3::operator -( Vector3 vec ) const
+{
+    return Point3( vec_sub( mVec128, vec.get128() ) );
+}
+
+inline Point3 & Point3::operator +=( Vector3 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( Vector3 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( vec_madd( pnt0.get128(), pnt1.get128(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+}
+
+inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( divf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 recipPerElem( Point3 pnt )
+{
+    return Point3( recipf4( pnt.get128() ) );
+}
+
+inline const Point3 sqrtPerElem( Point3 pnt )
+{
+    return Point3( sqrtf4( pnt.get128() ) );
+}
+
+inline const Point3 rsqrtPerElem( Point3 pnt )
+{
+    return Point3( rsqrtf4( pnt.get128() ) );
+}
+
+inline const Point3 absPerElem( Point3 pnt )
+{
+    return Point3( fabsf4( pnt.get128() ) );
+}
+
+inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( copysignf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( fmaxf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const floatInVec maxElem( Point3 pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( vec_splat( pnt.get128(), 1 ), pnt.get128() );
+    result = fmaxf4( vec_splat( pnt.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( fminf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const floatInVec minElem( Point3 pnt )
+{
+    vec_float4 result;
+    result = fminf4( vec_splat( pnt.get128(), 1 ), pnt.get128() );
+    result = fminf4( vec_splat( pnt.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const floatInVec sum( Point3 pnt )
+{
+    vec_float4 result;
+    result = vec_add( vec_splat( pnt.get128(), 1 ), pnt.get128() );
+    result = vec_add( vec_splat( pnt.get128(), 2 ), result );
+    return floatInVec( result, 0 );
+}
+
+inline const Point3 scale( Point3 pnt, float scaleVal )
+{
+    return scale( pnt, floatInVec( scaleVal ) );
+}
+
+inline const Point3 scale( Point3 pnt, floatInVec scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( Point3 pnt, Vector3 scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline const floatInVec projection( Point3 pnt, Vector3 unitVec )
+{
+    return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
+}
+
+inline const floatInVec distSqrFromOrigin( Point3 pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline const floatInVec distFromOrigin( Point3 pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline const floatInVec distSqr( Point3 pnt0, Point3 pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline const floatInVec dist( Point3 pnt0, Point3 pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 )
+{
+    return select( pnt0, pnt1, boolInVec(select1) );
+}
+
+inline const Point3 select( Point3 pnt0, Point3 pnt1, boolInVec select1 )
+{
+    return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Point3 pnt )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+inline void print( Point3 pnt, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_soa.h
index 89fdfcf7f..3f26c9060 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vec_soa.h
@@ -1,1425 +1,1425 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_CPP_H
-#define _VECTORMATH_VEC_SOA_CPP_H
-//-----------------------------------------------------------------------------
-// Constants
-// for permutes, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_PERM_X 0x00010203
-#define _VECTORMATH_PERM_Y 0x04050607
-#define _VECTORMATH_PERM_Z 0x08090a0b
-#define _VECTORMATH_PERM_W 0x0c0d0e0f
-#define _VECTORMATH_PERM_A 0x10111213
-#define _VECTORMATH_PERM_B 0x14151617
-#define _VECTORMATH_PERM_C 0x18191a1b
-#define _VECTORMATH_PERM_D 0x1c1d1e1f
-#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
-#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
-#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
-#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
-#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Soa {
-
-inline Vector3::Vector3( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-}
-
-inline Vector3::Vector3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Vector3::Vector3( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-}
-
-inline Vector3::Vector3( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline Vector3::Vector3( Aos::Vector3 vec )
-{
-    vec_float4 vec128 = vec.get128();
-    mX = vec_splat( vec128, 0 );
-    mY = vec_splat( vec128, 1 );
-    mZ = vec_splat( vec128, 2 );
-}
-
-inline Vector3::Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( vec0.get128(), vec2.get128() );
-    tmp1 = vec_mergeh( vec1.get128(), vec3.get128() );
-    tmp2 = vec_mergel( vec0.get128(), vec2.get128() );
-    tmp3 = vec_mergel( vec1.get128(), vec3.get128() );
-    mX = vec_mergeh( tmp0, tmp1 );
-    mY = vec_mergel( tmp0, tmp1 );
-    mZ = vec_mergeh( tmp2, tmp3 );
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline void Vector3::get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = vec_mergeh( mX, mZ );
-    tmp1 = vec_mergel( mX, mZ );
-    result0 = Aos::Vector3( vec_mergeh( tmp0, mY ) );
-    result1 = Aos::Vector3( vec_perm( tmp0, mY, _VECTORMATH_PERM_ZBWX ) );
-    result2 = Aos::Vector3( vec_perm( tmp1, mY, _VECTORMATH_PERM_XCYX ) );
-    result3 = Aos::Vector3( vec_perm( tmp1, mY, _VECTORMATH_PERM_ZDWX ) );
-}
-
-inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = vec_sld( yzxy, xyzx, 8 );
-    zxzx = vec_sld( xyzx, zxyz, 8 );
-    yzyz = vec_sld( zxyz, yzxy, 8 );
-    vec.setX( vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
-    vec.setY( vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
-    vec.setZ( vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
-}
-
-inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = vec_perm( vec.getX(), vec.getY(), _VECTORMATH_PERM_ZCXA );
-    zxzx = vec_perm( vec.getZ(), vec.getX(), _VECTORMATH_PERM_XBZD );
-    yzyz = vec_perm( vec.getY(), vec.getZ(), _VECTORMATH_PERM_WDYB );
-    xyzx = vec_sld( xyxy, zxzx, 8 );
-    yzxy = vec_sld( yzyz, xyxy, 8 );
-    zxyz = vec_sld( zxzx, yzyz, 8 );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( vec0, xyz0 );
-    storeXYZArray( vec1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Vector3 & Vector3::operator =( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Vector3::getX( ) const
-{
-    return mX;
-}
-
-inline Vector3 & Vector3::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Vector3::getY( ) const
-{
-    return mY;
-}
-
-inline Vector3 & Vector3::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Vector3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector3 & Vector3::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Vector3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Vector3::vec_float4_t & Vector3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Vector3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
-{
-    return Vector3(
-        vec_add( mX, vec.mX ),
-        vec_add( mY, vec.mY ),
-        vec_add( mZ, vec.mZ )
-    );
-}
-
-inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
-{
-    return Vector3(
-        vec_sub( mX, vec.mX ),
-        vec_sub( mY, vec.mY ),
-        vec_sub( mZ, vec.mZ )
-    );
-}
-
-inline const Point3 Vector3::operator +( const Point3 & pnt ) const
-{
-    return Point3(
-        vec_add( mX, pnt.getX() ),
-        vec_add( mY, pnt.getY() ),
-        vec_add( mZ, pnt.getZ() )
-    );
-}
-
-inline const Vector3 Vector3::operator *( vec_float4 scalar ) const
-{
-    return Vector3(
-        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline Vector3 & Vector3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( vec_float4 scalar ) const
-{
-    return Vector3(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ )
-    );
-}
-
-inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        divf4( vec0.getX(), vec1.getX() ),
-        divf4( vec0.getY(), vec1.getY() ),
-        divf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline const Vector3 recipPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getX() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getY() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getZ() )
-    );
-}
-
-inline const Vector3 sqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        sqrtf4( vec.getX() ),
-        sqrtf4( vec.getY() ),
-        sqrtf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 rsqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getX() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getY() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getZ() ) )
-    );
-}
-
-inline const Vector3 absPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        fabsf4( vec.getX() ),
-        fabsf4( vec.getY() ),
-        fabsf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        copysignf4( vec0.getX(), vec1.getX() ),
-        copysignf4( vec0.getY(), vec1.getY() ),
-        copysignf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        fmaxf4( vec0.getX(), vec1.getX() ),
-        fmaxf4( vec0.getY(), vec1.getY() ),
-        fmaxf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline vec_float4 maxElem( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec.getX(), vec.getY() );
-    result = fmaxf4( vec.getZ(), result );
-    return result;
-}
-
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        fminf4( vec0.getX(), vec1.getX() ),
-        fminf4( vec0.getY(), vec1.getY() ),
-        fminf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline vec_float4 minElem( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = fminf4( vec.getX(), vec.getY() );
-    result = fminf4( vec.getZ(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = vec_add( vec.getX(), vec.getY() );
-    result = vec_add( result, vec.getZ() );
-    return result;
-}
-
-inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 lengthSqr( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = vec_madd( vec.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 length( const Vector3 & vec )
-{
-    return sqrtf4( lengthSqr( vec ) );
-}
-
-inline const Vector3 normalize( const Vector3 & vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    return Vector3(
-        vec_madd( vec.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        vec_sub( vec_madd( vec0.getY(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getZ(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_sub( vec_madd( vec0.getZ(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getX(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
-        vec_sub( vec_madd( vec0.getX(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getY(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
-    );
-}
-
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 )
-{
-    return Vector3(
-        vec_sel( vec0.getX(), vec1.getX(), select1 ),
-        vec_sel( vec0.getY(), vec1.getY(), select1 ),
-        vec_sel( vec0.getZ(), vec1.getZ(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector3 & vec )
-{
-    Aos::Vector3 vec0, vec1, vec2, vec3;
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Vector3 & vec, const char * name )
-{
-    Aos::Vector3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-inline Vector4::Vector4( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-}
-
-inline Vector4::Vector4( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Vector4::Vector4( const Vector3 & xyz, vec_float4 _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Vector4::Vector4( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
-}
-
-inline Vector4::Vector4( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-    mW = ((vec_float4){1.0f,1.0f,1.0f,1.0f});
-}
-
-inline Vector4::Vector4( const Quat & quat )
-{
-    mX = quat.getX();
-    mY = quat.getY();
-    mZ = quat.getZ();
-    mW = quat.getW();
-}
-
-inline Vector4::Vector4( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline Vector4::Vector4( Aos::Vector4 vec )
-{
-    vec_float4 vec128 = vec.get128();
-    mX = vec_splat( vec128, 0 );
-    mY = vec_splat( vec128, 1 );
-    mZ = vec_splat( vec128, 2 );
-    mW = vec_splat( vec128, 3 );
-}
-
-inline Vector4::Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( vec0.get128(), vec2.get128() );
-    tmp1 = vec_mergeh( vec1.get128(), vec3.get128() );
-    tmp2 = vec_mergel( vec0.get128(), vec2.get128() );
-    tmp3 = vec_mergel( vec1.get128(), vec3.get128() );
-    mX = vec_mergeh( tmp0, tmp1 );
-    mY = vec_mergel( tmp0, tmp1 );
-    mZ = vec_mergeh( tmp2, tmp3 );
-    mW = vec_mergel( tmp2, tmp3 );
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
-}
-
-inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
-{
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
-    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline void Vector4::get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( mX, mZ );
-    tmp1 = vec_mergeh( mY, mW );
-    tmp2 = vec_mergel( mX, mZ );
-    tmp3 = vec_mergel( mY, mW );
-    result0 = Aos::Vector4( vec_mergeh( tmp0, tmp1 ) );
-    result1 = Aos::Vector4( vec_mergel( tmp0, tmp1 ) );
-    result2 = Aos::Vector4( vec_mergeh( tmp2, tmp3 ) );
-    result3 = Aos::Vector4( vec_mergel( tmp2, tmp3 ) );
-}
-
-inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads )
-{
-    Aos::Vector4 v0, v1, v2, v3;
-    vec.get4Aos( v0, v1, v2, v3 );
-    twoQuads[0] = _vmath2VfToHalfFloats(v0.get128(), v1.get128());
-    twoQuads[1] = _vmath2VfToHalfFloats(v2.get128(), v3.get128());
-}
-
-inline Vector4 & Vector4::operator =( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Vector4 & Vector4::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Vector4::getX( ) const
-{
-    return mX;
-}
-
-inline Vector4 & Vector4::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Vector4::getY( ) const
-{
-    return mY;
-}
-
-inline Vector4 & Vector4::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Vector4::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector4 & Vector4::setW( vec_float4 _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline vec_float4 Vector4::getW( ) const
-{
-    return mW;
-}
-
-inline Vector4 & Vector4::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Vector4::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Vector4::vec_float4_t & Vector4::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Vector4::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
-{
-    return Vector4(
-        vec_add( mX, vec.mX ),
-        vec_add( mY, vec.mY ),
-        vec_add( mZ, vec.mZ ),
-        vec_add( mW, vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
-{
-    return Vector4(
-        vec_sub( mX, vec.mX ),
-        vec_sub( mY, vec.mY ),
-        vec_sub( mZ, vec.mZ ),
-        vec_sub( mW, vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator *( vec_float4 scalar ) const
-{
-    return Vector4(
-        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( mW, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline Vector4 & Vector4::operator +=( const Vector4 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( const Vector4 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( vec_float4 scalar ) const
-{
-    return Vector4(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar ),
-        divf4( mW, scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ ),
-        negatef4( mW )
-    );
-}
-
-inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec0.getW(), vec1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        divf4( vec0.getX(), vec1.getX() ),
-        divf4( vec0.getY(), vec1.getY() ),
-        divf4( vec0.getZ(), vec1.getZ() ),
-        divf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline const Vector4 recipPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getX() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getY() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getZ() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getW() )
-    );
-}
-
-inline const Vector4 sqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        sqrtf4( vec.getX() ),
-        sqrtf4( vec.getY() ),
-        sqrtf4( vec.getZ() ),
-        sqrtf4( vec.getW() )
-    );
-}
-
-inline const Vector4 rsqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getX() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getY() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getZ() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getW() ) )
-    );
-}
-
-inline const Vector4 absPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        fabsf4( vec.getX() ),
-        fabsf4( vec.getY() ),
-        fabsf4( vec.getZ() ),
-        fabsf4( vec.getW() )
-    );
-}
-
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        copysignf4( vec0.getX(), vec1.getX() ),
-        copysignf4( vec0.getY(), vec1.getY() ),
-        copysignf4( vec0.getZ(), vec1.getZ() ),
-        copysignf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        fmaxf4( vec0.getX(), vec1.getX() ),
-        fmaxf4( vec0.getY(), vec1.getY() ),
-        fmaxf4( vec0.getZ(), vec1.getZ() ),
-        fmaxf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline vec_float4 maxElem( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec.getX(), vec.getY() );
-    result = fmaxf4( vec.getZ(), result );
-    result = fmaxf4( vec.getW(), result );
-    return result;
-}
-
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        fminf4( vec0.getX(), vec1.getX() ),
-        fminf4( vec0.getY(), vec1.getY() ),
-        fminf4( vec0.getZ(), vec1.getZ() ),
-        fminf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline vec_float4 minElem( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = fminf4( vec.getX(), vec.getY() );
-    result = fminf4( vec.getZ(), result );
-    result = fminf4( vec.getW(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = vec_add( vec.getX(), vec.getY() );
-    result = vec_add( result, vec.getZ() );
-    result = vec_add( result, vec.getW() );
-    return result;
-}
-
-inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    vec_float4 result;
-    result = vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec0.getW(), vec1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 lengthSqr( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = vec_madd( vec.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( vec.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( vec.getW(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 length( const Vector4 & vec )
-{
-    return sqrtf4( lengthSqr( vec ) );
-}
-
-inline const Vector4 normalize( const Vector4 & vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
-    return Vector4(
-        vec_madd( vec.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( vec.getW(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 )
-{
-    return Vector4(
-        vec_sel( vec0.getX(), vec1.getX(), select1 ),
-        vec_sel( vec0.getY(), vec1.getY(), select1 ),
-        vec_sel( vec0.getZ(), vec1.getZ(), select1 ),
-        vec_sel( vec0.getW(), vec1.getW(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector4 & vec )
-{
-    Aos::Vector4 vec0, vec1, vec2, vec3;
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Vector4 & vec, const char * name )
-{
-    Aos::Vector4 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-inline Point3::Point3( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-}
-
-inline Point3::Point3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Point3::Point3( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-}
-
-inline Point3::Point3( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline Point3::Point3( Aos::Point3 pnt )
-{
-    vec_float4 vec128 = pnt.get128();
-    mX = vec_splat( vec128, 0 );
-    mY = vec_splat( vec128, 1 );
-    mZ = vec_splat( vec128, 2 );
-}
-
-inline Point3::Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = vec_mergeh( pnt0.get128(), pnt2.get128() );
-    tmp1 = vec_mergeh( pnt1.get128(), pnt3.get128() );
-    tmp2 = vec_mergel( pnt0.get128(), pnt2.get128() );
-    tmp3 = vec_mergel( pnt1.get128(), pnt3.get128() );
-    mX = vec_mergeh( tmp0, tmp1 );
-    mY = vec_mergel( tmp0, tmp1 );
-    mZ = vec_mergeh( tmp2, tmp3 );
-}
-
-inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline void Point3::get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = vec_mergeh( mX, mZ );
-    tmp1 = vec_mergel( mX, mZ );
-    result0 = Aos::Point3( vec_mergeh( tmp0, mY ) );
-    result1 = Aos::Point3( vec_perm( tmp0, mY, _VECTORMATH_PERM_ZBWX ) );
-    result2 = Aos::Point3( vec_perm( tmp1, mY, _VECTORMATH_PERM_XCYX ) );
-    result3 = Aos::Point3( vec_perm( tmp1, mY, _VECTORMATH_PERM_ZDWX ) );
-}
-
-inline void loadXYZArray( Point3 & vec, const vec_float4 * threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = vec_sld( yzxy, xyzx, 8 );
-    zxzx = vec_sld( xyzx, zxyz, 8 );
-    yzyz = vec_sld( zxyz, yzxy, 8 );
-    vec.setX( vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
-    vec.setY( vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
-    vec.setZ( vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
-}
-
-inline void storeXYZArray( const Point3 & vec, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = vec_perm( vec.getX(), vec.getY(), _VECTORMATH_PERM_ZCXA );
-    zxzx = vec_perm( vec.getZ(), vec.getX(), _VECTORMATH_PERM_XBZD );
-    yzyz = vec_perm( vec.getY(), vec.getZ(), _VECTORMATH_PERM_WDYB );
-    xyzx = vec_sld( xyxy, zxzx, 8 );
-    yzxy = vec_sld( yzyz, xyxy, 8 );
-    zxyz = vec_sld( zxzx, yzyz, 8 );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( pnt0, xyz0 );
-    storeXYZArray( pnt1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Point3 & Point3::operator =( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-    return *this;
-}
-
-inline Point3 & Point3::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Point3::getX( ) const
-{
-    return mX;
-}
-
-inline Point3 & Point3::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Point3::getY( ) const
-{
-    return mY;
-}
-
-inline Point3 & Point3::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Point3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Point3 & Point3::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Point3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Point3::vec_float4_t & Point3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Point3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Point3::operator -( const Point3 & pnt ) const
-{
-    return Vector3(
-        vec_sub( mX, pnt.mX ),
-        vec_sub( mY, pnt.mY ),
-        vec_sub( mZ, pnt.mZ )
-    );
-}
-
-inline const Point3 Point3::operator +( const Vector3 & vec ) const
-{
-    return Point3(
-        vec_add( mX, vec.getX() ),
-        vec_add( mY, vec.getY() ),
-        vec_add( mZ, vec.getZ() )
-    );
-}
-
-inline const Point3 Point3::operator -( const Vector3 & vec ) const
-{
-    return Point3(
-        vec_sub( mX, vec.getX() ),
-        vec_sub( mY, vec.getY() ),
-        vec_sub( mZ, vec.getZ() )
-    );
-}
-
-inline Point3 & Point3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        vec_madd( pnt0.getX(), pnt1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( pnt0.getY(), pnt1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
-        vec_madd( pnt0.getZ(), pnt1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
-    );
-}
-
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        divf4( pnt0.getX(), pnt1.getX() ),
-        divf4( pnt0.getY(), pnt1.getY() ),
-        divf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline const Point3 recipPerElem( const Point3 & pnt )
-{
-    return Point3(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getX() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getY() ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getZ() )
-    );
-}
-
-inline const Point3 sqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        sqrtf4( pnt.getX() ),
-        sqrtf4( pnt.getY() ),
-        sqrtf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 rsqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getX() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getY() ) ),
-        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getZ() ) )
-    );
-}
-
-inline const Point3 absPerElem( const Point3 & pnt )
-{
-    return Point3(
-        fabsf4( pnt.getX() ),
-        fabsf4( pnt.getY() ),
-        fabsf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        copysignf4( pnt0.getX(), pnt1.getX() ),
-        copysignf4( pnt0.getY(), pnt1.getY() ),
-        copysignf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        fmaxf4( pnt0.getX(), pnt1.getX() ),
-        fmaxf4( pnt0.getY(), pnt1.getY() ),
-        fmaxf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline vec_float4 maxElem( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( pnt.getX(), pnt.getY() );
-    result = fmaxf4( pnt.getZ(), result );
-    return result;
-}
-
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        fminf4( pnt0.getX(), pnt1.getX() ),
-        fminf4( pnt0.getY(), pnt1.getY() ),
-        fminf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline vec_float4 minElem( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = fminf4( pnt.getX(), pnt.getY() );
-    result = fminf4( pnt.getZ(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = vec_add( pnt.getX(), pnt.getY() );
-    result = vec_add( result, pnt.getZ() );
-    return result;
-}
-
-inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec )
-{
-    vec_float4 result;
-    result = vec_madd( pnt.getX(), unitVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
-    result = vec_add( result, vec_madd( pnt.getY(), unitVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    result = vec_add( result, vec_madd( pnt.getZ(), unitVec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
-    return result;
-}
-
-inline vec_float4 distSqrFromOrigin( const Point3 & pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline vec_float4 distFromOrigin( const Point3 & pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 )
-{
-    return Point3(
-        vec_sel( pnt0.getX(), pnt1.getX(), select1 ),
-        vec_sel( pnt0.getY(), pnt1.getY(), select1 ),
-        vec_sel( pnt0.getZ(), pnt1.getZ(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Point3 & pnt )
-{
-    Aos::Point3 vec0, vec1, vec2, vec3;
-    pnt.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Point3 & pnt, const char * name )
-{
-    Aos::Point3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    pnt.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_CPP_H
+#define _VECTORMATH_VEC_SOA_CPP_H
+//-----------------------------------------------------------------------------
+// Constants
+// for permutes, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZDWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_ZCXA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XBZD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WDYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_WCYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_D, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Soa {
+
+inline Vector3::Vector3( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+}
+
+inline Vector3::Vector3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Vector3::Vector3( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+}
+
+inline Vector3::Vector3( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline Vector3::Vector3( Aos::Vector3 vec )
+{
+    vec_float4 vec128 = vec.get128();
+    mX = vec_splat( vec128, 0 );
+    mY = vec_splat( vec128, 1 );
+    mZ = vec_splat( vec128, 2 );
+}
+
+inline Vector3::Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( vec0.get128(), vec2.get128() );
+    tmp1 = vec_mergeh( vec1.get128(), vec3.get128() );
+    tmp2 = vec_mergel( vec0.get128(), vec2.get128() );
+    tmp3 = vec_mergel( vec1.get128(), vec3.get128() );
+    mX = vec_mergeh( tmp0, tmp1 );
+    mY = vec_mergel( tmp0, tmp1 );
+    mZ = vec_mergeh( tmp2, tmp3 );
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void Vector3::get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = vec_mergeh( mX, mZ );
+    tmp1 = vec_mergel( mX, mZ );
+    result0 = Aos::Vector3( vec_mergeh( tmp0, mY ) );
+    result1 = Aos::Vector3( vec_perm( tmp0, mY, _VECTORMATH_PERM_ZBWX ) );
+    result2 = Aos::Vector3( vec_perm( tmp1, mY, _VECTORMATH_PERM_XCYX ) );
+    result3 = Aos::Vector3( vec_perm( tmp1, mY, _VECTORMATH_PERM_ZDWX ) );
+}
+
+inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = vec_sld( yzxy, xyzx, 8 );
+    zxzx = vec_sld( xyzx, zxyz, 8 );
+    yzyz = vec_sld( zxyz, yzxy, 8 );
+    vec.setX( vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
+    vec.setY( vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
+    vec.setZ( vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
+}
+
+inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = vec_perm( vec.getX(), vec.getY(), _VECTORMATH_PERM_ZCXA );
+    zxzx = vec_perm( vec.getZ(), vec.getX(), _VECTORMATH_PERM_XBZD );
+    yzyz = vec_perm( vec.getY(), vec.getZ(), _VECTORMATH_PERM_WDYB );
+    xyzx = vec_sld( xyxy, zxzx, 8 );
+    yzxy = vec_sld( yzyz, xyxy, 8 );
+    zxyz = vec_sld( zxzx, yzyz, 8 );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( vec0, xyz0 );
+    storeXYZArray( vec1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Vector3 & Vector3::operator =( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Vector3::getX( ) const
+{
+    return mX;
+}
+
+inline Vector3 & Vector3::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Vector3::getY( ) const
+{
+    return mY;
+}
+
+inline Vector3 & Vector3::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Vector3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector3 & Vector3::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Vector3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Vector3::vec_float4_t & Vector3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Vector3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
+{
+    return Vector3(
+        vec_add( mX, vec.mX ),
+        vec_add( mY, vec.mY ),
+        vec_add( mZ, vec.mZ )
+    );
+}
+
+inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
+{
+    return Vector3(
+        vec_sub( mX, vec.mX ),
+        vec_sub( mY, vec.mY ),
+        vec_sub( mZ, vec.mZ )
+    );
+}
+
+inline const Point3 Vector3::operator +( const Point3 & pnt ) const
+{
+    return Point3(
+        vec_add( mX, pnt.getX() ),
+        vec_add( mY, pnt.getY() ),
+        vec_add( mZ, pnt.getZ() )
+    );
+}
+
+inline const Vector3 Vector3::operator *( vec_float4 scalar ) const
+{
+    return Vector3(
+        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline Vector3 & Vector3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( vec_float4 scalar ) const
+{
+    return Vector3(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ )
+    );
+}
+
+inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        divf4( vec0.getX(), vec1.getX() ),
+        divf4( vec0.getY(), vec1.getY() ),
+        divf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline const Vector3 recipPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getX() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getY() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getZ() )
+    );
+}
+
+inline const Vector3 sqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        sqrtf4( vec.getX() ),
+        sqrtf4( vec.getY() ),
+        sqrtf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 rsqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getX() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getY() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getZ() ) )
+    );
+}
+
+inline const Vector3 absPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        fabsf4( vec.getX() ),
+        fabsf4( vec.getY() ),
+        fabsf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        copysignf4( vec0.getX(), vec1.getX() ),
+        copysignf4( vec0.getY(), vec1.getY() ),
+        copysignf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        fmaxf4( vec0.getX(), vec1.getX() ),
+        fmaxf4( vec0.getY(), vec1.getY() ),
+        fmaxf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline vec_float4 maxElem( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec.getX(), vec.getY() );
+    result = fmaxf4( vec.getZ(), result );
+    return result;
+}
+
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        fminf4( vec0.getX(), vec1.getX() ),
+        fminf4( vec0.getY(), vec1.getY() ),
+        fminf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline vec_float4 minElem( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = fminf4( vec.getX(), vec.getY() );
+    result = fminf4( vec.getZ(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = vec_add( vec.getX(), vec.getY() );
+    result = vec_add( result, vec.getZ() );
+    return result;
+}
+
+inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 lengthSqr( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = vec_madd( vec.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 length( const Vector3 & vec )
+{
+    return sqrtf4( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( const Vector3 & vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    return Vector3(
+        vec_madd( vec.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        vec_sub( vec_madd( vec0.getY(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getZ(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_sub( vec_madd( vec0.getZ(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getX(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ),
+        vec_sub( vec_madd( vec0.getX(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), vec_madd( vec0.getY(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) )
+    );
+}
+
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 )
+{
+    return Vector3(
+        vec_sel( vec0.getX(), vec1.getX(), select1 ),
+        vec_sel( vec0.getY(), vec1.getY(), select1 ),
+        vec_sel( vec0.getZ(), vec1.getZ(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector3 & vec )
+{
+    Aos::Vector3 vec0, vec1, vec2, vec3;
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Vector3 & vec, const char * name )
+{
+    Aos::Vector3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+inline Vector4::Vector4( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+}
+
+inline Vector4::Vector4( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Vector4::Vector4( const Vector3 & xyz, vec_float4 _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Vector4::Vector4( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = ((vec_float4){0.0f,0.0f,0.0f,0.0f});
+}
+
+inline Vector4::Vector4( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+    mW = ((vec_float4){1.0f,1.0f,1.0f,1.0f});
+}
+
+inline Vector4::Vector4( const Quat & quat )
+{
+    mX = quat.getX();
+    mY = quat.getY();
+    mZ = quat.getZ();
+    mW = quat.getW();
+}
+
+inline Vector4::Vector4( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline Vector4::Vector4( Aos::Vector4 vec )
+{
+    vec_float4 vec128 = vec.get128();
+    mX = vec_splat( vec128, 0 );
+    mY = vec_splat( vec128, 1 );
+    mZ = vec_splat( vec128, 2 );
+    mW = vec_splat( vec128, 3 );
+}
+
+inline Vector4::Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( vec0.get128(), vec2.get128() );
+    tmp1 = vec_mergeh( vec1.get128(), vec3.get128() );
+    tmp2 = vec_mergel( vec0.get128(), vec2.get128() );
+    tmp3 = vec_mergel( vec1.get128(), vec3.get128() );
+    mX = vec_mergeh( tmp0, tmp1 );
+    mY = vec_mergel( tmp0, tmp1 );
+    mZ = vec_mergeh( tmp2, tmp3 );
+    mW = vec_mergel( tmp2, tmp3 );
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+}
+
+inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
+{
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)vec_cmpgt( (vec_float4){_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL,_VECTORMATH_SLERP_TOL}, cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sinf4( angle ) );
+    scale0 = vec_sel( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), vec_madd( sinf4( vec_madd( vec_sub( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), t ), angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    scale1 = vec_sel( t, vec_madd( sinf4( vec_madd( t, angle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) ), recipSinAngle, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ), selectMask );
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void Vector4::get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( mX, mZ );
+    tmp1 = vec_mergeh( mY, mW );
+    tmp2 = vec_mergel( mX, mZ );
+    tmp3 = vec_mergel( mY, mW );
+    result0 = Aos::Vector4( vec_mergeh( tmp0, tmp1 ) );
+    result1 = Aos::Vector4( vec_mergel( tmp0, tmp1 ) );
+    result2 = Aos::Vector4( vec_mergeh( tmp2, tmp3 ) );
+    result3 = Aos::Vector4( vec_mergel( tmp2, tmp3 ) );
+}
+
+inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads )
+{
+    Aos::Vector4 v0, v1, v2, v3;
+    vec.get4Aos( v0, v1, v2, v3 );
+    twoQuads[0] = _vmath2VfToHalfFloats(v0.get128(), v1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(v2.get128(), v3.get128());
+}
+
+inline Vector4 & Vector4::operator =( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Vector4 & Vector4::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Vector4::getX( ) const
+{
+    return mX;
+}
+
+inline Vector4 & Vector4::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Vector4::getY( ) const
+{
+    return mY;
+}
+
+inline Vector4 & Vector4::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Vector4::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector4 & Vector4::setW( vec_float4 _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline vec_float4 Vector4::getW( ) const
+{
+    return mW;
+}
+
+inline Vector4 & Vector4::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Vector4::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Vector4::vec_float4_t & Vector4::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Vector4::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
+{
+    return Vector4(
+        vec_add( mX, vec.mX ),
+        vec_add( mY, vec.mY ),
+        vec_add( mZ, vec.mZ ),
+        vec_add( mW, vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
+{
+    return Vector4(
+        vec_sub( mX, vec.mX ),
+        vec_sub( mY, vec.mY ),
+        vec_sub( mZ, vec.mZ ),
+        vec_sub( mW, vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator *( vec_float4 scalar ) const
+{
+    return Vector4(
+        vec_madd( mX, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mY, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mZ, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( mW, scalar, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline Vector4 & Vector4::operator +=( const Vector4 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( const Vector4 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( vec_float4 scalar ) const
+{
+    return Vector4(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar ),
+        divf4( mW, scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ ),
+        negatef4( mW )
+    );
+}
+
+inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec0.getW(), vec1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        divf4( vec0.getX(), vec1.getX() ),
+        divf4( vec0.getY(), vec1.getY() ),
+        divf4( vec0.getZ(), vec1.getZ() ),
+        divf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline const Vector4 recipPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getX() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getY() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getZ() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), vec.getW() )
+    );
+}
+
+inline const Vector4 sqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        sqrtf4( vec.getX() ),
+        sqrtf4( vec.getY() ),
+        sqrtf4( vec.getZ() ),
+        sqrtf4( vec.getW() )
+    );
+}
+
+inline const Vector4 rsqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getX() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getY() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getZ() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( vec.getW() ) )
+    );
+}
+
+inline const Vector4 absPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        fabsf4( vec.getX() ),
+        fabsf4( vec.getY() ),
+        fabsf4( vec.getZ() ),
+        fabsf4( vec.getW() )
+    );
+}
+
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        copysignf4( vec0.getX(), vec1.getX() ),
+        copysignf4( vec0.getY(), vec1.getY() ),
+        copysignf4( vec0.getZ(), vec1.getZ() ),
+        copysignf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        fmaxf4( vec0.getX(), vec1.getX() ),
+        fmaxf4( vec0.getY(), vec1.getY() ),
+        fmaxf4( vec0.getZ(), vec1.getZ() ),
+        fmaxf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline vec_float4 maxElem( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec.getX(), vec.getY() );
+    result = fmaxf4( vec.getZ(), result );
+    result = fmaxf4( vec.getW(), result );
+    return result;
+}
+
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        fminf4( vec0.getX(), vec1.getX() ),
+        fminf4( vec0.getY(), vec1.getY() ),
+        fminf4( vec0.getZ(), vec1.getZ() ),
+        fminf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline vec_float4 minElem( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = fminf4( vec.getX(), vec.getY() );
+    result = fminf4( vec.getZ(), result );
+    result = fminf4( vec.getW(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = vec_add( vec.getX(), vec.getY() );
+    result = vec_add( result, vec.getZ() );
+    result = vec_add( result, vec.getW() );
+    return result;
+}
+
+inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    vec_float4 result;
+    result = vec_madd( vec0.getX(), vec1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec0.getY(), vec1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0.getZ(), vec1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec0.getW(), vec1.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 lengthSqr( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = vec_madd( vec.getX(), vec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( vec.getY(), vec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec.getZ(), vec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( vec.getW(), vec.getW(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 length( const Vector4 & vec )
+{
+    return sqrtf4( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( const Vector4 & vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( lenSqr ) );
+    return Vector4(
+        vec_madd( vec.getX(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec.getY(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec.getZ(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( vec.getW(), lenInv, ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 )
+{
+    return Vector4(
+        vec_sel( vec0.getX(), vec1.getX(), select1 ),
+        vec_sel( vec0.getY(), vec1.getY(), select1 ),
+        vec_sel( vec0.getZ(), vec1.getZ(), select1 ),
+        vec_sel( vec0.getW(), vec1.getW(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector4 & vec )
+{
+    Aos::Vector4 vec0, vec1, vec2, vec3;
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Vector4 & vec, const char * name )
+{
+    Aos::Vector4 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+inline Point3::Point3( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+}
+
+inline Point3::Point3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Point3::Point3( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+}
+
+inline Point3::Point3( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline Point3::Point3( Aos::Point3 pnt )
+{
+    vec_float4 vec128 = pnt.get128();
+    mX = vec_splat( vec128, 0 );
+    mY = vec_splat( vec128, 1 );
+    mZ = vec_splat( vec128, 2 );
+}
+
+inline Point3::Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = vec_mergeh( pnt0.get128(), pnt2.get128() );
+    tmp1 = vec_mergeh( pnt1.get128(), pnt3.get128() );
+    tmp2 = vec_mergel( pnt0.get128(), pnt2.get128() );
+    tmp3 = vec_mergel( pnt1.get128(), pnt3.get128() );
+    mX = vec_mergeh( tmp0, tmp1 );
+    mY = vec_mergel( tmp0, tmp1 );
+    mZ = vec_mergeh( tmp2, tmp3 );
+}
+
+inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline void Point3::get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = vec_mergeh( mX, mZ );
+    tmp1 = vec_mergel( mX, mZ );
+    result0 = Aos::Point3( vec_mergeh( tmp0, mY ) );
+    result1 = Aos::Point3( vec_perm( tmp0, mY, _VECTORMATH_PERM_ZBWX ) );
+    result2 = Aos::Point3( vec_perm( tmp1, mY, _VECTORMATH_PERM_XCYX ) );
+    result3 = Aos::Point3( vec_perm( tmp1, mY, _VECTORMATH_PERM_ZDWX ) );
+}
+
+inline void loadXYZArray( Point3 & vec, const vec_float4 * threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = vec_sld( yzxy, xyzx, 8 );
+    zxzx = vec_sld( xyzx, zxyz, 8 );
+    yzyz = vec_sld( zxyz, yzxy, 8 );
+    vec.setX( vec_perm( xyxy, zxzx, _VECTORMATH_PERM_ZBXD ) );
+    vec.setY( vec_perm( xyxy, yzyz, _VECTORMATH_PERM_WCYA ) );
+    vec.setZ( vec_perm( zxzx, yzyz, _VECTORMATH_PERM_XDZB ) );
+}
+
+inline void storeXYZArray( const Point3 & vec, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = vec_perm( vec.getX(), vec.getY(), _VECTORMATH_PERM_ZCXA );
+    zxzx = vec_perm( vec.getZ(), vec.getX(), _VECTORMATH_PERM_XBZD );
+    yzyz = vec_perm( vec.getY(), vec.getZ(), _VECTORMATH_PERM_WDYB );
+    xyzx = vec_sld( xyxy, zxzx, 8 );
+    yzxy = vec_sld( yzyz, xyxy, 8 );
+    zxyz = vec_sld( zxzx, yzyz, 8 );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( pnt0, xyz0 );
+    storeXYZArray( pnt1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Point3 & Point3::operator =( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+    return *this;
+}
+
+inline Point3 & Point3::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Point3::getX( ) const
+{
+    return mX;
+}
+
+inline Point3 & Point3::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Point3::getY( ) const
+{
+    return mY;
+}
+
+inline Point3 & Point3::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Point3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Point3 & Point3::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Point3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Point3::vec_float4_t & Point3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Point3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Point3::operator -( const Point3 & pnt ) const
+{
+    return Vector3(
+        vec_sub( mX, pnt.mX ),
+        vec_sub( mY, pnt.mY ),
+        vec_sub( mZ, pnt.mZ )
+    );
+}
+
+inline const Point3 Point3::operator +( const Vector3 & vec ) const
+{
+    return Point3(
+        vec_add( mX, vec.getX() ),
+        vec_add( mY, vec.getY() ),
+        vec_add( mZ, vec.getZ() )
+    );
+}
+
+inline const Point3 Point3::operator -( const Vector3 & vec ) const
+{
+    return Point3(
+        vec_sub( mX, vec.getX() ),
+        vec_sub( mY, vec.getY() ),
+        vec_sub( mZ, vec.getZ() )
+    );
+}
+
+inline Point3 & Point3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        vec_madd( pnt0.getX(), pnt1.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( pnt0.getY(), pnt1.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ),
+        vec_madd( pnt0.getZ(), pnt1.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) )
+    );
+}
+
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        divf4( pnt0.getX(), pnt1.getX() ),
+        divf4( pnt0.getY(), pnt1.getY() ),
+        divf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline const Point3 recipPerElem( const Point3 & pnt )
+{
+    return Point3(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getX() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getY() ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), pnt.getZ() )
+    );
+}
+
+inline const Point3 sqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        sqrtf4( pnt.getX() ),
+        sqrtf4( pnt.getY() ),
+        sqrtf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 rsqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getX() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getY() ) ),
+        divf4( ((vec_float4){1.0f,1.0f,1.0f,1.0f}), sqrtf4( pnt.getZ() ) )
+    );
+}
+
+inline const Point3 absPerElem( const Point3 & pnt )
+{
+    return Point3(
+        fabsf4( pnt.getX() ),
+        fabsf4( pnt.getY() ),
+        fabsf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        copysignf4( pnt0.getX(), pnt1.getX() ),
+        copysignf4( pnt0.getY(), pnt1.getY() ),
+        copysignf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        fmaxf4( pnt0.getX(), pnt1.getX() ),
+        fmaxf4( pnt0.getY(), pnt1.getY() ),
+        fmaxf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline vec_float4 maxElem( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( pnt.getX(), pnt.getY() );
+    result = fmaxf4( pnt.getZ(), result );
+    return result;
+}
+
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        fminf4( pnt0.getX(), pnt1.getX() ),
+        fminf4( pnt0.getY(), pnt1.getY() ),
+        fminf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline vec_float4 minElem( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = fminf4( pnt.getX(), pnt.getY() );
+    result = fminf4( pnt.getZ(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = vec_add( pnt.getX(), pnt.getY() );
+    result = vec_add( result, pnt.getZ() );
+    return result;
+}
+
+inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec )
+{
+    vec_float4 result;
+    result = vec_madd( pnt.getX(), unitVec.getX(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) );
+    result = vec_add( result, vec_madd( pnt.getY(), unitVec.getY(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    result = vec_add( result, vec_madd( pnt.getZ(), unitVec.getZ(), ((vec_float4){0.0f,0.0f,0.0f,0.0f}) ) );
+    return result;
+}
+
+inline vec_float4 distSqrFromOrigin( const Point3 & pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline vec_float4 distFromOrigin( const Point3 & pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 )
+{
+    return Point3(
+        vec_sel( pnt0.getX(), pnt1.getX(), select1 ),
+        vec_sel( pnt0.getY(), pnt1.getY(), select1 ),
+        vec_sel( pnt0.getZ(), pnt1.getZ(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Point3 & pnt )
+{
+    Aos::Point3 vec0, vec1, vec2, vec3;
+    pnt.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Point3 & pnt, const char * name )
+{
+    Aos::Point3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    pnt.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vecidx_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vecidx_aos.h
index df3357570..86ddf84fd 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vecidx_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vecidx_aos.h
@@ -1,80 +1,80 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VECIDX_AOS_H
-#define _VECTORMATH_VECIDX_AOS_H
-
-#include "floatInVec.h"
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// VecIdx 
-// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
-// subscripting operator.
-//
-
-class VecIdx
-{
-private:
-    typedef vec_float4 vec_float4_t;
-    vec_float4_t &ref __attribute__ ((aligned(16)));
-    int i __attribute__ ((aligned(16)));
-public:
-    inline VecIdx( vec_float4_t& vec, int idx ): ref(vec) { i = idx; }
-
-    // implicitly casts to float unless _VECTORMATH_NO_SCALAR_CAST defined
-    // in which case, implicitly casts to floatInVec, and one must call
-    // getAsFloat to convert to float.
-    //
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-    inline operator floatInVec() const;
-    inline float getAsFloat() const;
-#else
-    inline operator float() const;
-#endif
-
-    inline float operator =( float scalar );
-    inline floatInVec operator =( floatInVec scalar );
-    inline floatInVec operator =( const VecIdx& scalar );
-    inline floatInVec operator *=( float scalar );
-    inline floatInVec operator *=( floatInVec scalar );
-    inline floatInVec operator /=( float scalar );
-    inline floatInVec operator /=( floatInVec scalar );
-    inline floatInVec operator +=( float scalar );
-    inline floatInVec operator +=( floatInVec scalar );
-    inline floatInVec operator -=( float scalar );
-    inline floatInVec operator -=( floatInVec scalar );
-};
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VECIDX_AOS_H
+#define _VECTORMATH_VECIDX_AOS_H
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// VecIdx 
+// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
+// subscripting operator.
+//
+
+class VecIdx
+{
+private:
+    typedef vec_float4 vec_float4_t;
+    vec_float4_t &ref __attribute__ ((aligned(16)));
+    int i __attribute__ ((aligned(16)));
+public:
+    inline VecIdx( vec_float4_t& vec, int idx ): ref(vec) { i = idx; }
+
+    // implicitly casts to float unless _VECTORMATH_NO_SCALAR_CAST defined
+    // in which case, implicitly casts to floatInVec, and one must call
+    // getAsFloat to convert to float.
+    //
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+    inline operator floatInVec() const;
+    inline float getAsFloat() const;
+#else
+    inline operator float() const;
+#endif
+
+    inline float operator =( float scalar );
+    inline floatInVec operator =( floatInVec scalar );
+    inline floatInVec operator =( const VecIdx& scalar );
+    inline floatInVec operator *=( float scalar );
+    inline floatInVec operator *=( floatInVec scalar );
+    inline floatInVec operator /=( float scalar );
+    inline floatInVec operator /=( floatInVec scalar );
+    inline floatInVec operator +=( float scalar );
+    inline floatInVec operator +=( floatInVec scalar );
+    inline floatInVec operator -=( float scalar );
+    inline floatInVec operator -=( floatInVec scalar );
+};
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_aos.h
index fdd039b8a..dfa041999 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_aos.h
@@ -1,2244 +1,2244 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_CPP_H
-#define _VECTORMATH_AOS_CPP_H
-
-#include <math.h>
-#include <altivec.h>
-#include "vecidx_aos.h"
-#include "floatInVec.h"
-#include "boolInVec.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-namespace Vectormath {
-
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A 3-D vector in array-of-structures format
-//
-class Vector3
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( float x, float y, float z );
-
-    // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type)
-    // 
-    inline Vector3( floatInVec x, floatInVec y, floatInVec z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( Point3 pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( float scalar );
-
-    // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Vector3( floatInVec scalar );
-
-    // Set vector float data in a 3-D vector
-    // 
-    explicit inline Vector3( vec_float4 vf4 );
-
-    // Get vector float data from a 3-D vector
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( Vector3 vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( float x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( float y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( float z );
-
-    // Set the x element of a 3-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setX( floatInVec x );
-
-    // Set the y element of a 3-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setY( floatInVec y );
-
-    // Set the z element of a 3-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setZ( floatInVec z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, float value );
-
-    // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type)
-    // 
-    inline Vector3 & setElem( int idx, floatInVec value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( Vector3 vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( Vector3 vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( Point3 pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( float scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( float scalar ) const;
-
-    // Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector3 operator *( floatInVec scalar ) const;
-
-    // Divide a 3-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector3 operator /( floatInVec scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( Vector3 vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( Vector3 vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector3 & operator *=( floatInVec scalar );
-
-    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector3 & operator /=( floatInVec scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-};
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( float scalar, Vector3 vec );
-
-// Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
-// 
-inline const Vector3 operator *( floatInVec scalar, Vector3 vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( Vector3 vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( Vector3 vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( Vector3 vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( Vector3 vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline const floatInVec maxElem( Vector3 vec );
-
-// Minimum element of a 3-D vector
-// 
-inline const floatInVec minElem( Vector3 vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline const floatInVec sum( Vector3 vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline const floatInVec dot( Vector3 vec0, Vector3 vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline const floatInVec lengthSqr( Vector3 vec );
-
-// Compute the length of a 3-D vector
-// 
-inline const floatInVec length( Vector3 vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( Vector3 vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( Vector3 vec0, Vector3 vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( Vector3 vec0, Vector3 vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// NOTE: 
-// Slower than column post-multiply.
-// 
-inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( Vector3 vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 );
-
-// Linear interpolation between two 3-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( floatInVec t, Vector3 vec0, Vector3 vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 );
-
-// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( floatInVec t, Vector3 unitVec0, Vector3 unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 );
-
-// Conditionally select between two 3-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, boolInVec select1 );
-
-// Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
-// The value of the fourth word (the word with the highest address) remains unchanged
-// 
-inline void storeXYZ( Vector3 vec, vec_float4 * quad );
-
-// Load four three-float 3-D vectors, stored in three quadwords
-// 
-inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads );
-
-// Store four 3-D vectors in three quadwords
-// 
-inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads );
-
-// Store eight 3-D vectors as half-floats
-// 
-inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector3 vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector3 vec, const char * name );
-
-#endif
-
-// A 4-D vector in array-of-structures format
-//
-class Vector4
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( float x, float y, float z, float w );
-
-    // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type)
-    // 
-    inline Vector4( floatInVec x, floatInVec y, floatInVec z, floatInVec w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( Vector3 xyz, float w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector4( Vector3 xyz, floatInVec w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( Vector3 vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( Point3 pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( Quat quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( float scalar );
-
-    // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Vector4( floatInVec scalar );
-
-    // Set vector float data in a 4-D vector
-    // 
-    explicit inline Vector4( vec_float4 vf4 );
-
-    // Get vector float data from a 4-D vector
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( Vector4 vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( Vector3 vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( float x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( float y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( float z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( float w );
-
-    // Set the x element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setX( floatInVec x );
-
-    // Set the y element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setY( floatInVec y );
-
-    // Set the z element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setZ( floatInVec z );
-
-    // Set the w element of a 4-D vector (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setW( floatInVec w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline const floatInVec getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, float value );
-
-    // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type)
-    // 
-    inline Vector4 & setElem( int idx, floatInVec value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( Vector4 vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( Vector4 vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( float scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( float scalar ) const;
-
-    // Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector4 operator *( floatInVec scalar ) const;
-
-    // Divide a 4-D vector by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Vector4 operator /( floatInVec scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( Vector4 vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( Vector4 vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector4 & operator *=( floatInVec scalar );
-
-    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
-    // 
-    inline Vector4 & operator /=( floatInVec scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-};
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( float scalar, Vector4 vec );
-
-// Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
-// 
-inline const Vector4 operator *( floatInVec scalar, Vector4 vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( Vector4 vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( Vector4 vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( Vector4 vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( Vector4 vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline const floatInVec maxElem( Vector4 vec );
-
-// Minimum element of a 4-D vector
-// 
-inline const floatInVec minElem( Vector4 vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline const floatInVec sum( Vector4 vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline const floatInVec dot( Vector4 vec0, Vector4 vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline const floatInVec lengthSqr( Vector4 vec );
-
-// Compute the length of a 4-D vector
-// 
-inline const floatInVec length( Vector4 vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( Vector4 vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( Vector4 vec0, Vector4 vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 );
-
-// Linear interpolation between two 4-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( floatInVec t, Vector4 vec0, Vector4 vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 );
-
-// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( floatInVec t, Vector4 unitVec0, Vector4 unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 );
-
-// Conditionally select between two 4-D vectors (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, boolInVec select1 );
-
-// Store four 4-D vectors as half-floats
-// 
-inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector4 vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector4 vec, const char * name );
-
-#endif
-
-// A 3-D point in array-of-structures format
-//
-class Point3
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( float x, float y, float z );
-
-    // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type)
-    // 
-    inline Point3( floatInVec x, floatInVec y, floatInVec z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( Vector3 vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( float scalar );
-
-    // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Point3( floatInVec scalar );
-
-    // Set vector float data in a 3-D point
-    // 
-    explicit inline Point3( vec_float4 vf4 );
-
-    // Get vector float data from a 3-D point
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( Point3 pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( float x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( float y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( float z );
-
-    // Set the x element of a 3-D point (scalar data contained in vector data type)
-    // 
-    inline Point3 & setX( floatInVec x );
-
-    // Set the y element of a 3-D point (scalar data contained in vector data type)
-    // 
-    inline Point3 & setY( floatInVec y );
-
-    // Set the z element of a 3-D point (scalar data contained in vector data type)
-    // 
-    inline Point3 & setZ( floatInVec z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, float value );
-
-    // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type)
-    // 
-    inline Point3 & setElem( int idx, floatInVec value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( Point3 pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( Vector3 vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( Vector3 vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( Vector3 vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( Vector3 vec );
-
-};
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( Point3 pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( Point3 pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( Point3 pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( Point3 pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline const floatInVec maxElem( Point3 pnt );
-
-// Minimum element of a 3-D point
-// 
-inline const floatInVec minElem( Point3 pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline const floatInVec sum( Point3 pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( Point3 pnt, float scaleVal );
-
-// Apply uniform scale to a 3-D point (scalar data contained in vector data type)
-// 
-inline const Point3 scale( Point3 pnt, floatInVec scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( Point3 pnt, Vector3 scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline const floatInVec projection( Point3 pnt, Vector3 unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline const floatInVec distSqrFromOrigin( Point3 pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline const floatInVec distFromOrigin( Point3 pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline const floatInVec distSqr( Point3 pnt0, Point3 pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline const floatInVec dist( Point3 pnt0, Point3 pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 );
-
-// Linear interpolation between two 3-D points (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( floatInVec t, Point3 pnt0, Point3 pnt1 );
-
-// Conditionally select between two 3-D points
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 );
-
-// Conditionally select between two 3-D points (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Point3 select( Point3 pnt0, Point3 pnt1, boolInVec select1 );
-
-// Store x, y, and z elements of a 3-D point in the first three words of a quadword.
-// The value of the fourth word (the word with the highest address) remains unchanged
-// 
-inline void storeXYZ( Point3 pnt, vec_float4 * quad );
-
-// Load four three-float 3-D points, stored in three quadwords
-// 
-inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads );
-
-// Store four 3-D points in three quadwords
-// 
-inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads );
-
-// Store eight 3-D points as half-floats
-// 
-inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Point3 pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Point3 pnt, const char * name );
-
-#endif
-
-// A quaternion in array-of-structures format
-//
-class Quat
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( float x, float y, float z, float w );
-
-    // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type)
-    // 
-    inline Quat( floatInVec x, floatInVec y, floatInVec z, floatInVec w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( Vector3 xyz, float w );
-
-    // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type)
-    // 
-    inline Quat( Vector3 xyz, floatInVec w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( Vector4 vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( float scalar );
-
-    // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Quat( floatInVec scalar );
-
-    // Set vector float data in a quaternion
-    // 
-    explicit inline Quat( vec_float4 vf4 );
-
-    // Get vector float data from a quaternion
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( Quat quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( Vector3 vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( float x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( float y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( float z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( float w );
-
-    // Set the x element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setX( floatInVec x );
-
-    // Set the y element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setY( floatInVec y );
-
-    // Set the z element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setZ( floatInVec z );
-
-    // Set the w element of a quaternion (scalar data contained in vector data type)
-    // 
-    inline Quat & setW( floatInVec w );
-
-    // Get the x element of a quaternion
-    // 
-    inline const floatInVec getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline const floatInVec getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline const floatInVec getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline const floatInVec getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, float value );
-
-    // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type)
-    // 
-    inline Quat & setElem( int idx, floatInVec value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline const floatInVec getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline const floatInVec operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( Quat quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( Quat quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( Quat quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( float scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( float scalar ) const;
-
-    // Multiply a quaternion by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Quat operator *( floatInVec scalar ) const;
-
-    // Divide a quaternion by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Quat operator /( floatInVec scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( Quat quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( Quat quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( Quat quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Quat & operator *=( floatInVec scalar );
-
-    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
-    // 
-    inline Quat & operator /=( floatInVec scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( Vector3 unitVec0, Vector3 unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( float radians, Vector3 unitVec );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotation( floatInVec radians, Vector3 unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( float radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( float radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( float radians );
-
-    // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotationX( floatInVec radians );
-
-    // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotationY( floatInVec radians );
-
-    // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Quat rotationZ( floatInVec radians );
-
-};
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( float scalar, Quat quat );
-
-// Multiply a quaternion by a scalar (scalar data contained in vector data type)
-// 
-inline const Quat operator *( floatInVec scalar, Quat quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( Quat quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( Quat unitQuat, Vector3 vec );
-
-// Compute the dot product of two quaternions
-// 
-inline const floatInVec dot( Quat quat0, Quat quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline const floatInVec norm( Quat quat );
-
-// Compute the length of a quaternion
-// 
-inline const floatInVec length( Quat quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( Quat quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( float t, Quat quat0, Quat quat1 );
-
-// Linear interpolation between two quaternions (scalar data contained in vector data type)
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( floatInVec t, Quat quat0, Quat quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 );
-
-// Spherical linear interpolation between two quaternions (scalar data contained in vector data type)
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( floatInVec t, Quat unitQuat0, Quat unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
-
-// Spherical quadrangle interpolation (scalar data contained in vector data type)
-// 
-inline const Quat squad( floatInVec t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
-
-// Conditionally select between two quaternions
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Quat select( Quat quat0, Quat quat1, bool select1 );
-
-// Conditionally select between two quaternions (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Quat select( Quat quat0, Quat quat1, boolInVec select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Quat quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Quat quat, const char * name );
-
-#endif
-
-// A 3x3 matrix in array-of-structures format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( Vector3 col0, Vector3 col1, Vector3 col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( Quat unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( float scalar );
-
-    // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Matrix3( floatInVec scalar );
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( Vector3 col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( Vector3 col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( Vector3 col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, Vector3 vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, Vector3 vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, float val );
-
-    // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type)
-    // 
-    inline Matrix3 & setElem( int col, int row, floatInVec val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline const floatInVec getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( float scalar ) const;
-
-    // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Matrix3 operator *( floatInVec scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( Vector3 vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Matrix3 & operator *=( floatInVec scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( float radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( float radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( float radians );
-
-    // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotationX( floatInVec radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotationY( floatInVec radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotationZ( floatInVec radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( float radians, Vector3 unitVec );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Matrix3 rotation( floatInVec radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( Quat unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( Vector3 scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
-
-// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
-// 
-inline const Matrix3 operator *( floatInVec scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline const floatInVec determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
-
-// Conditionally select between two 3x3 matrices (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, boolInVec select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A 4x4 matrix in array-of-structures format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( Vector4 col0, Vector4 col1, Vector4 col2, Vector4 col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, Vector3 translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( Quat unitQuat, Vector3 translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( float scalar );
-
-    // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Matrix4( floatInVec scalar );
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( Vector3 translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( Vector4 col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( Vector4 col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( Vector4 col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( Vector4 col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, Vector4 vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, Vector4 vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, float val );
-
-    // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type)
-    // 
-    inline Matrix4 & setElem( int col, int row, floatInVec val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline const floatInVec getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( float scalar ) const;
-
-    // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
-    // 
-    inline const Matrix4 operator *( floatInVec scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( Vector4 vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( Vector3 vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( Point3 pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
-    // 
-    inline Matrix4 & operator *=( floatInVec scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( float radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( float radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( float radians );
-
-    // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotationX( floatInVec radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotationY( floatInVec radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotationZ( floatInVec radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( float radians, Vector3 unitVec );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Matrix4 rotation( floatInVec radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( Quat unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( Vector3 scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( Vector3 translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
-
-// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
-// 
-inline const Matrix4 operator *( floatInVec scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline const floatInVec determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
-
-// Conditionally select between two 4x4 matrices (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, boolInVec select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A 3x4 transformation matrix in array-of-structures format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( Vector3 col0, Vector3 col1, Vector3 col2, Vector3 col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, Vector3 translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( Quat unitQuat, Vector3 translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( float scalar );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type)
-    // 
-    explicit inline Transform3( floatInVec scalar );
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( Vector3 translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( Vector3 col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( Vector3 col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( Vector3 col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( Vector3 col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, Vector3 vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, Vector4 vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, float val );
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type)
-    // 
-    inline Transform3 & setElem( int col, int row, floatInVec val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline const floatInVec getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( Vector3 vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( Point3 pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotationX( floatInVec radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotationY( floatInVec radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotationZ( floatInVec radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( float radians, Vector3 unitVec );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
-    // 
-    static inline const Transform3 rotation( floatInVec radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( Quat unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( Vector3 scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( Vector3 translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// However, the transfer of select1 to a VMX register may use more processing time than a branch.
-// Use the boolInVec version for better performance.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
-
-// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type)
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, boolInVec select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_PPU_H
+#define _VECTORMATH_AOS_CPP_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include "vecidx_aos.h"
+#include "floatInVec.h"
+#include "boolInVec.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( float x, float y, float z );
+
+    // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type)
+    // 
+    inline Vector3( floatInVec x, floatInVec y, floatInVec z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( Point3 pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( float scalar );
+
+    // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Vector3( floatInVec scalar );
+
+    // Set vector float data in a 3-D vector
+    // 
+    explicit inline Vector3( vec_float4 vf4 );
+
+    // Get vector float data from a 3-D vector
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( Vector3 vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( float z );
+
+    // Set the x element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setX( floatInVec x );
+
+    // Set the y element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setY( floatInVec y );
+
+    // Set the z element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setZ( floatInVec z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, float value );
+
+    // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type)
+    // 
+    inline Vector3 & setElem( int idx, floatInVec value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( Vector3 vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( Vector3 vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( Point3 pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( float scalar ) const;
+
+    // Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector3 operator *( floatInVec scalar ) const;
+
+    // Divide a 3-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector3 operator /( floatInVec scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( Vector3 vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( Vector3 vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector3 & operator *=( floatInVec scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector3 & operator /=( floatInVec scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( float scalar, Vector3 vec );
+
+// Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
+// 
+inline const Vector3 operator *( floatInVec scalar, Vector3 vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( Vector3 vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( Vector3 vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( Vector3 vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( Vector3 vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline const floatInVec maxElem( Vector3 vec );
+
+// Minimum element of a 3-D vector
+// 
+inline const floatInVec minElem( Vector3 vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline const floatInVec sum( Vector3 vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline const floatInVec dot( Vector3 vec0, Vector3 vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline const floatInVec lengthSqr( Vector3 vec );
+
+// Compute the length of a 3-D vector
+// 
+inline const floatInVec length( Vector3 vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( Vector3 vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( Vector3 vec0, Vector3 vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( Vector3 vec0, Vector3 vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// NOTE: 
+// Slower than column post-multiply.
+// 
+inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( Vector3 vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 );
+
+// Linear interpolation between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( floatInVec t, Vector3 vec0, Vector3 vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 );
+
+// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( floatInVec t, Vector3 unitVec0, Vector3 unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 );
+
+// Conditionally select between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, boolInVec select1 );
+
+// Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+// The value of the fourth word (the word with the highest address) remains unchanged
+// 
+inline void storeXYZ( Vector3 vec, vec_float4 * quad );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads );
+
+// Store four 3-D vectors in three quadwords
+// 
+inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads );
+
+// Store eight 3-D vectors as half-floats
+// 
+inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector3 vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector3 vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type)
+    // 
+    inline Vector4( floatInVec x, floatInVec y, floatInVec z, floatInVec w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( Vector3 xyz, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector4( Vector3 xyz, floatInVec w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( Vector3 vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( Point3 pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( Quat quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( float scalar );
+
+    // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Vector4( floatInVec scalar );
+
+    // Set vector float data in a 4-D vector
+    // 
+    explicit inline Vector4( vec_float4 vf4 );
+
+    // Get vector float data from a 4-D vector
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( Vector4 vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( Vector3 vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( float w );
+
+    // Set the x element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setX( floatInVec x );
+
+    // Set the y element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setY( floatInVec y );
+
+    // Set the z element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setZ( floatInVec z );
+
+    // Set the w element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setW( floatInVec w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline const floatInVec getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, float value );
+
+    // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type)
+    // 
+    inline Vector4 & setElem( int idx, floatInVec value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( Vector4 vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( Vector4 vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( float scalar ) const;
+
+    // Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector4 operator *( floatInVec scalar ) const;
+
+    // Divide a 4-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Vector4 operator /( floatInVec scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( Vector4 vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( Vector4 vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector4 & operator *=( floatInVec scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    inline Vector4 & operator /=( floatInVec scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( float scalar, Vector4 vec );
+
+// Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
+// 
+inline const Vector4 operator *( floatInVec scalar, Vector4 vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( Vector4 vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( Vector4 vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( Vector4 vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( Vector4 vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline const floatInVec maxElem( Vector4 vec );
+
+// Minimum element of a 4-D vector
+// 
+inline const floatInVec minElem( Vector4 vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline const floatInVec sum( Vector4 vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline const floatInVec dot( Vector4 vec0, Vector4 vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline const floatInVec lengthSqr( Vector4 vec );
+
+// Compute the length of a 4-D vector
+// 
+inline const floatInVec length( Vector4 vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( Vector4 vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( Vector4 vec0, Vector4 vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 );
+
+// Linear interpolation between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( floatInVec t, Vector4 vec0, Vector4 vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 );
+
+// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( floatInVec t, Vector4 unitVec0, Vector4 unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 );
+
+// Conditionally select between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, boolInVec select1 );
+
+// Store four 4-D vectors as half-floats
+// 
+inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector4 vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector4 vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( float x, float y, float z );
+
+    // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type)
+    // 
+    inline Point3( floatInVec x, floatInVec y, floatInVec z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( Vector3 vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( float scalar );
+
+    // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Point3( floatInVec scalar );
+
+    // Set vector float data in a 3-D point
+    // 
+    explicit inline Point3( vec_float4 vf4 );
+
+    // Get vector float data from a 3-D point
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( Point3 pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( float z );
+
+    // Set the x element of a 3-D point (scalar data contained in vector data type)
+    // 
+    inline Point3 & setX( floatInVec x );
+
+    // Set the y element of a 3-D point (scalar data contained in vector data type)
+    // 
+    inline Point3 & setY( floatInVec y );
+
+    // Set the z element of a 3-D point (scalar data contained in vector data type)
+    // 
+    inline Point3 & setZ( floatInVec z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, float value );
+
+    // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type)
+    // 
+    inline Point3 & setElem( int idx, floatInVec value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( Point3 pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( Vector3 vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( Vector3 vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( Vector3 vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( Vector3 vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( Point3 pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( Point3 pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( Point3 pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( Point3 pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline const floatInVec maxElem( Point3 pnt );
+
+// Minimum element of a 3-D point
+// 
+inline const floatInVec minElem( Point3 pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline const floatInVec sum( Point3 pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( Point3 pnt, float scaleVal );
+
+// Apply uniform scale to a 3-D point (scalar data contained in vector data type)
+// 
+inline const Point3 scale( Point3 pnt, floatInVec scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( Point3 pnt, Vector3 scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline const floatInVec projection( Point3 pnt, Vector3 unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline const floatInVec distSqrFromOrigin( Point3 pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline const floatInVec distFromOrigin( Point3 pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline const floatInVec distSqr( Point3 pnt0, Point3 pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline const floatInVec dist( Point3 pnt0, Point3 pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 );
+
+// Linear interpolation between two 3-D points (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( floatInVec t, Point3 pnt0, Point3 pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 );
+
+// Conditionally select between two 3-D points (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Point3 select( Point3 pnt0, Point3 pnt1, boolInVec select1 );
+
+// Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+// The value of the fourth word (the word with the highest address) remains unchanged
+// 
+inline void storeXYZ( Point3 pnt, vec_float4 * quad );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads );
+
+// Store four 3-D points in three quadwords
+// 
+inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads );
+
+// Store eight 3-D points as half-floats
+// 
+inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Point3 pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Point3 pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( float x, float y, float z, float w );
+
+    // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type)
+    // 
+    inline Quat( floatInVec x, floatInVec y, floatInVec z, floatInVec w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( Vector3 xyz, float w );
+
+    // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type)
+    // 
+    inline Quat( Vector3 xyz, floatInVec w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( Vector4 vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( float scalar );
+
+    // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Quat( floatInVec scalar );
+
+    // Set vector float data in a quaternion
+    // 
+    explicit inline Quat( vec_float4 vf4 );
+
+    // Get vector float data from a quaternion
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( Quat quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( Vector3 vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( float w );
+
+    // Set the x element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setX( floatInVec x );
+
+    // Set the y element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setY( floatInVec y );
+
+    // Set the z element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setZ( floatInVec z );
+
+    // Set the w element of a quaternion (scalar data contained in vector data type)
+    // 
+    inline Quat & setW( floatInVec w );
+
+    // Get the x element of a quaternion
+    // 
+    inline const floatInVec getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline const floatInVec getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline const floatInVec getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline const floatInVec getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, float value );
+
+    // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type)
+    // 
+    inline Quat & setElem( int idx, floatInVec value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline const floatInVec operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( Quat quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( Quat quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( Quat quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( float scalar ) const;
+
+    // Multiply a quaternion by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Quat operator *( floatInVec scalar ) const;
+
+    // Divide a quaternion by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Quat operator /( floatInVec scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( Quat quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( Quat quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( Quat quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Quat & operator *=( floatInVec scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    inline Quat & operator /=( floatInVec scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( Vector3 unitVec0, Vector3 unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( float radians, Vector3 unitVec );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotation( floatInVec radians, Vector3 unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( float radians );
+
+    // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotationX( floatInVec radians );
+
+    // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotationY( floatInVec radians );
+
+    // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Quat rotationZ( floatInVec radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( float scalar, Quat quat );
+
+// Multiply a quaternion by a scalar (scalar data contained in vector data type)
+// 
+inline const Quat operator *( floatInVec scalar, Quat quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( Quat quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( Quat unitQuat, Vector3 vec );
+
+// Compute the dot product of two quaternions
+// 
+inline const floatInVec dot( Quat quat0, Quat quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline const floatInVec norm( Quat quat );
+
+// Compute the length of a quaternion
+// 
+inline const floatInVec length( Quat quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( Quat quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( float t, Quat quat0, Quat quat1 );
+
+// Linear interpolation between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( floatInVec t, Quat quat0, Quat quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 );
+
+// Spherical linear interpolation between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( floatInVec t, Quat unitQuat0, Quat unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
+
+// Spherical quadrangle interpolation (scalar data contained in vector data type)
+// 
+inline const Quat squad( floatInVec t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Quat select( Quat quat0, Quat quat1, bool select1 );
+
+// Conditionally select between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Quat select( Quat quat0, Quat quat1, boolInVec select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Quat quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Quat quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( Vector3 col0, Vector3 col1, Vector3 col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( Quat unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( float scalar );
+
+    // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Matrix3( floatInVec scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( Vector3 col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( Vector3 col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( Vector3 col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, Vector3 vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, Vector3 vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, float val );
+
+    // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    inline Matrix3 & setElem( int col, int row, floatInVec val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline const floatInVec getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Matrix3 operator *( floatInVec scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( Vector3 vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Matrix3 & operator *=( floatInVec scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotationX( floatInVec radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotationY( floatInVec radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotationZ( floatInVec radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( float radians, Vector3 unitVec );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Matrix3 rotation( floatInVec radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( Quat unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( Vector3 scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
+// 
+inline const Matrix3 operator *( floatInVec scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline const floatInVec determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+// Conditionally select between two 3x3 matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, boolInVec select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( Vector4 col0, Vector4 col1, Vector4 col2, Vector4 col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, Vector3 translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( Quat unitQuat, Vector3 translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( float scalar );
+
+    // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Matrix4( floatInVec scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( Vector3 translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( Vector4 col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( Vector4 col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( Vector4 col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( Vector4 col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, Vector4 vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, Vector4 vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, float val );
+
+    // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    inline Matrix4 & setElem( int col, int row, floatInVec val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline const floatInVec getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
+    // 
+    inline const Matrix4 operator *( floatInVec scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( Vector4 vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( Vector3 vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( Point3 pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    inline Matrix4 & operator *=( floatInVec scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotationX( floatInVec radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotationY( floatInVec radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotationZ( floatInVec radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( float radians, Vector3 unitVec );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Matrix4 rotation( floatInVec radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( Quat unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( Vector3 scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( Vector3 translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
+// 
+inline const Matrix4 operator *( floatInVec scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline const floatInVec determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+// Conditionally select between two 4x4 matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, boolInVec select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( Vector3 col0, Vector3 col1, Vector3 col2, Vector3 col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, Vector3 translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( Quat unitQuat, Vector3 translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( float scalar );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit inline Transform3( floatInVec scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( Vector3 translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( Vector3 col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( Vector3 col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( Vector3 col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( Vector3 col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, Vector3 vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, Vector4 vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, float val );
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    inline Transform3 & setElem( int col, int row, floatInVec val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline const floatInVec getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( Vector3 vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( Point3 pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotationX( floatInVec radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotationY( floatInVec radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotationZ( floatInVec radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( float radians, Vector3 unitVec );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static inline const Transform3 rotation( floatInVec radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( Quat unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( Vector3 scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( Vector3 translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, boolInVec select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_soa.h b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_soa.h
index f580153f0..4b99c3235 100644
--- a/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/ppu/cpp/vectormath_soa.h
@@ -1,1919 +1,1919 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_CPP_H
-#define _VECTORMATH_SOA_CPP_H
-
-#include <math.h>
-#include <altivec.h>
-#include "vectormath_aos.h"
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-namespace Vectormath {
-
-namespace Soa {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A set of four 3-D vectors in structure-of-arrays format
-//
-class Vector3
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Copy a 3-D vector
-    // 
-    inline Vector3( const Vector3 & vec );
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( vec_float4 x, vec_float4 y, vec_float4 z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( const Point3 & pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( vec_float4 scalar );
-
-    // Replicate an AoS 3-D vector
-    // 
-    inline Vector3( Aos::Vector3 vec );
-
-    // Insert four AoS 3-D vectors
-    // 
-    inline Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 );
-
-    // Extract four AoS 3-D vectors
-    // 
-    inline void get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const;
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( const Vector3 & vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( vec_float4 x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( vec_float4 y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( vec_float4 z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( const Vector3 & vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( const Point3 & pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( vec_float4 scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( const Vector3 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-};
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( const Vector3 & vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( const Vector3 & vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( const Vector3 & vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( const Vector3 & vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline vec_float4 maxElem( const Vector3 & vec );
-
-// Minimum element of a 3-D vector
-// 
-inline vec_float4 minElem( const Vector3 & vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline vec_float4 sum( const Vector3 & vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline vec_float4 lengthSqr( const Vector3 & vec );
-
-// Compute the length of a 3-D vector
-// 
-inline vec_float4 length( const Vector3 & vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( const Vector3 & vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// 
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( const Vector3 & vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 );
-
-// Load four three-float 3-D vectors, stored in three quadwords
-// 
-inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads );
-
-// Store four slots of an SoA 3-D vector in three quadwords
-// 
-inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads );
-
-// Store eight slots of two SoA 3-D vectors as half-floats
-// 
-inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec, const char * name );
-
-#endif
-
-// A set of four 4-D vectors in structure-of-arrays format
-//
-class Vector4
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-    vec_float4 mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Copy a 4-D vector
-    // 
-    inline Vector4( const Vector4 & vec );
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( const Vector3 & xyz, vec_float4 w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( const Vector3 & vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( const Point3 & pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( const Quat & quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( vec_float4 scalar );
-
-    // Replicate an AoS 4-D vector
-    // 
-    inline Vector4( Aos::Vector4 vec );
-
-    // Insert four AoS 4-D vectors
-    // 
-    inline Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 );
-
-    // Extract four AoS 4-D vectors
-    // 
-    inline void get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const;
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( const Vector4 & vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( vec_float4 x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( vec_float4 y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( vec_float4 z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( vec_float4 w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline vec_float4 getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( const Vector4 & vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( const Vector4 & vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( vec_float4 scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( const Vector4 & vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( const Vector4 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-};
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( const Vector4 & vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( const Vector4 & vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( const Vector4 & vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( const Vector4 & vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline vec_float4 maxElem( const Vector4 & vec );
-
-// Minimum element of a 4-D vector
-// 
-inline vec_float4 minElem( const Vector4 & vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline vec_float4 sum( const Vector4 & vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline vec_float4 lengthSqr( const Vector4 & vec );
-
-// Compute the length of a 4-D vector
-// 
-inline vec_float4 length( const Vector4 & vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( const Vector4 & vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 );
-
-// Store four slots of an SoA 4-D vector as half-floats
-// 
-inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec, const char * name );
-
-#endif
-
-// A set of four 3-D points in structure-of-arrays format
-//
-class Point3
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Copy a 3-D point
-    // 
-    inline Point3( const Point3 & pnt );
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( vec_float4 x, vec_float4 y, vec_float4 z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( const Vector3 & vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( vec_float4 scalar );
-
-    // Replicate an AoS 3-D point
-    // 
-    inline Point3( Aos::Point3 pnt );
-
-    // Insert four AoS 3-D points
-    // 
-    inline Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 );
-
-    // Extract four AoS 3-D points
-    // 
-    inline void get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const;
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( const Point3 & pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( vec_float4 x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( vec_float4 y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( vec_float4 z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( const Point3 & pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( const Vector3 & vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( const Vector3 & vec );
-
-};
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( const Point3 & pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( const Point3 & pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( const Point3 & pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( const Point3 & pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline vec_float4 maxElem( const Point3 & pnt );
-
-// Minimum element of a 3-D point
-// 
-inline vec_float4 minElem( const Point3 & pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline vec_float4 sum( const Point3 & pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline vec_float4 distSqrFromOrigin( const Point3 & pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline vec_float4 distFromOrigin( const Point3 & pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 );
-
-// Conditionally select between two 3-D points
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 );
-
-// Load four three-float 3-D points, stored in three quadwords
-// 
-inline void loadXYZArray( Point3 & pnt, const vec_float4 * threeQuads );
-
-// Store four slots of an SoA 3-D point in three quadwords
-// 
-inline void storeXYZArray( const Point3 & pnt, vec_float4 * threeQuads );
-
-// Store eight slots of two SoA 3-D points as half-floats
-// 
-inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt, const char * name );
-
-#endif
-
-// A set of four quaternions in structure-of-arrays format
-//
-class Quat
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-    vec_float4 mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Copy a quaternion
-    // 
-    inline Quat( const Quat & quat );
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( const Vector3 & xyz, vec_float4 w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( const Vector4 & vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( vec_float4 scalar );
-
-    // Replicate an AoS quaternion
-    // 
-    inline Quat( Aos::Quat quat );
-
-    // Insert four AoS quaternions
-    // 
-    inline Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 );
-
-    // Extract four AoS quaternions
-    // 
-    inline void get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const;
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( const Quat & quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( vec_float4 x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( vec_float4 y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( vec_float4 z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( vec_float4 w );
-
-    // Get the x element of a quaternion
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline vec_float4 getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( const Quat & quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( const Quat & quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( const Quat & quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( vec_float4 scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( const Quat & quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( vec_float4 radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( vec_float4 radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( vec_float4 radians );
-
-};
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( vec_float4 scalar, const Quat & quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( const Quat & quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
-
-// Compute the dot product of two quaternions
-// 
-inline vec_float4 dot( const Quat & quat0, const Quat & quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline vec_float4 norm( const Quat & quat );
-
-// Compute the length of a quaternion
-// 
-inline vec_float4 length( const Quat & quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( const Quat & quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
-
-// Conditionally select between two quaternions
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat, const char * name );
-
-#endif
-
-// A set of four 3x3 matrices in structure-of-arrays format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( const Quat & unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( vec_float4 scalar );
-
-    // Replicate an AoS 3x3 matrix
-    // 
-    inline Matrix3( const Aos::Matrix3 & mat );
-
-    // Insert four AoS 3x3 matrices
-    // 
-    inline Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 );
-
-    // Extract four AoS 3x3 matrices
-    // 
-    inline void get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const;
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( const Vector3 & col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, const Vector3 & vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( vec_float4 scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( const Vector3 & scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline vec_float4 determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A set of four 4x4 matrices in structure-of-arrays format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( vec_float4 scalar );
-
-    // Replicate an AoS 4x4 matrix
-    // 
-    inline Matrix4( const Aos::Matrix4 & mat );
-
-    // Insert four AoS 4x4 matrices
-    // 
-    inline Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 );
-
-    // Extract four AoS 4x4 matrices
-    // 
-    inline void get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const;
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( const Vector4 & col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( const Vector4 & col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( const Vector4 & col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( const Vector4 & col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, const Vector4 & vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( vec_float4 scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( const Vector4 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( const Quat & unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( const Vector3 & scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( const Vector3 & translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline vec_float4 determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A set of four 3x4 transformation matrices in structure-of-arrays format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( vec_float4 scalar );
-
-    // Replicate an AoS 3x4 transformation matrix
-    // 
-    inline Transform3( const Aos::Transform3 & tfrm );
-
-    // Insert four AoS 3x4 transformation matrices
-    // 
-    inline Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 );
-
-    // Extract four AoS 3x4 transformation matrices
-    // 
-    inline void get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const;
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( const Vector3 & col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( const Vector3 & col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( const Vector3 & scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( const Vector3 & translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#include "vec_soa.h"
-#include "quat_soa.h"
-#include "mat_soa.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_CPP_PPU_H
+#define _VECTORMATH_SOA_CPP_PPU_H
+
+#include <math.h>
+#include <altivec.h>
+#include "vectormath_aos.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+namespace Vectormath {
+
+namespace Soa {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A set of four 3-D vectors in structure-of-arrays format
+//
+class Vector3
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Copy a 3-D vector
+    // 
+    inline Vector3( const Vector3 & vec );
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( vec_float4 x, vec_float4 y, vec_float4 z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( const Point3 & pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( vec_float4 scalar );
+
+    // Replicate an AoS 3-D vector
+    // 
+    inline Vector3( Aos::Vector3 vec );
+
+    // Insert four AoS 3-D vectors
+    // 
+    inline Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 );
+
+    // Extract four AoS 3-D vectors
+    // 
+    inline void get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( const Vector3 & vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( vec_float4 x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( vec_float4 y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( vec_float4 z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( const Vector3 & vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( const Point3 & pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( vec_float4 scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( const Vector3 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( const Vector3 & vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( const Vector3 & vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( const Vector3 & vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( const Vector3 & vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline vec_float4 maxElem( const Vector3 & vec );
+
+// Minimum element of a 3-D vector
+// 
+inline vec_float4 minElem( const Vector3 & vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline vec_float4 sum( const Vector3 & vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline vec_float4 lengthSqr( const Vector3 & vec );
+
+// Compute the length of a 3-D vector
+// 
+inline vec_float4 length( const Vector3 & vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( const Vector3 & vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// 
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( const Vector3 & vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads );
+
+// Store four slots of an SoA 3-D vector in three quadwords
+// 
+inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads );
+
+// Store eight slots of two SoA 3-D vectors as half-floats
+// 
+inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec, const char * name );
+
+#endif
+
+// A set of four 4-D vectors in structure-of-arrays format
+//
+class Vector4
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+    vec_float4 mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Copy a 4-D vector
+    // 
+    inline Vector4( const Vector4 & vec );
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( const Vector3 & xyz, vec_float4 w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( const Vector3 & vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( const Point3 & pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( const Quat & quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( vec_float4 scalar );
+
+    // Replicate an AoS 4-D vector
+    // 
+    inline Vector4( Aos::Vector4 vec );
+
+    // Insert four AoS 4-D vectors
+    // 
+    inline Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 );
+
+    // Extract four AoS 4-D vectors
+    // 
+    inline void get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( const Vector4 & vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( vec_float4 x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( vec_float4 y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( vec_float4 z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( vec_float4 w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline vec_float4 getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( const Vector4 & vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( const Vector4 & vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( vec_float4 scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( const Vector4 & vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( const Vector4 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( const Vector4 & vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( const Vector4 & vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( const Vector4 & vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( const Vector4 & vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline vec_float4 maxElem( const Vector4 & vec );
+
+// Minimum element of a 4-D vector
+// 
+inline vec_float4 minElem( const Vector4 & vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline vec_float4 sum( const Vector4 & vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline vec_float4 lengthSqr( const Vector4 & vec );
+
+// Compute the length of a 4-D vector
+// 
+inline vec_float4 length( const Vector4 & vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( const Vector4 & vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 );
+
+// Store four slots of an SoA 4-D vector as half-floats
+// 
+inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec, const char * name );
+
+#endif
+
+// A set of four 3-D points in structure-of-arrays format
+//
+class Point3
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Copy a 3-D point
+    // 
+    inline Point3( const Point3 & pnt );
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( vec_float4 x, vec_float4 y, vec_float4 z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( const Vector3 & vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( vec_float4 scalar );
+
+    // Replicate an AoS 3-D point
+    // 
+    inline Point3( Aos::Point3 pnt );
+
+    // Insert four AoS 3-D points
+    // 
+    inline Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 );
+
+    // Extract four AoS 3-D points
+    // 
+    inline void get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const;
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( const Point3 & pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( vec_float4 x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( vec_float4 y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( vec_float4 z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( const Point3 & pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( const Vector3 & vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( const Vector3 & vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( const Point3 & pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( const Point3 & pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( const Point3 & pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( const Point3 & pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline vec_float4 maxElem( const Point3 & pnt );
+
+// Minimum element of a 3-D point
+// 
+inline vec_float4 minElem( const Point3 & pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline vec_float4 sum( const Point3 & pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline vec_float4 distSqrFromOrigin( const Point3 & pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline vec_float4 distFromOrigin( const Point3 & pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+inline void loadXYZArray( Point3 & pnt, const vec_float4 * threeQuads );
+
+// Store four slots of an SoA 3-D point in three quadwords
+// 
+inline void storeXYZArray( const Point3 & pnt, vec_float4 * threeQuads );
+
+// Store eight slots of two SoA 3-D points as half-floats
+// 
+inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt, const char * name );
+
+#endif
+
+// A set of four quaternions in structure-of-arrays format
+//
+class Quat
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+    vec_float4 mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Copy a quaternion
+    // 
+    inline Quat( const Quat & quat );
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( const Vector3 & xyz, vec_float4 w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( const Vector4 & vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( vec_float4 scalar );
+
+    // Replicate an AoS quaternion
+    // 
+    inline Quat( Aos::Quat quat );
+
+    // Insert four AoS quaternions
+    // 
+    inline Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 );
+
+    // Extract four AoS quaternions
+    // 
+    inline void get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const;
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( const Quat & quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( vec_float4 x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( vec_float4 y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( vec_float4 z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( vec_float4 w );
+
+    // Get the x element of a quaternion
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline vec_float4 getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( const Quat & quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( const Quat & quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( const Quat & quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( vec_float4 scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( const Quat & quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( vec_float4 radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( vec_float4 radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( vec_float4 radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( vec_float4 scalar, const Quat & quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( const Quat & quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
+
+// Compute the dot product of two quaternions
+// 
+inline vec_float4 dot( const Quat & quat0, const Quat & quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline vec_float4 norm( const Quat & quat );
+
+// Compute the length of a quaternion
+// 
+inline vec_float4 length( const Quat & quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( const Quat & quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat, const char * name );
+
+#endif
+
+// A set of four 3x3 matrices in structure-of-arrays format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( const Quat & unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( vec_float4 scalar );
+
+    // Replicate an AoS 3x3 matrix
+    // 
+    inline Matrix3( const Aos::Matrix3 & mat );
+
+    // Insert four AoS 3x3 matrices
+    // 
+    inline Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 );
+
+    // Extract four AoS 3x3 matrices
+    // 
+    inline void get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const;
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( const Vector3 & col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, const Vector3 & vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( vec_float4 scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( const Vector3 & scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline vec_float4 determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A set of four 4x4 matrices in structure-of-arrays format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( vec_float4 scalar );
+
+    // Replicate an AoS 4x4 matrix
+    // 
+    inline Matrix4( const Aos::Matrix4 & mat );
+
+    // Insert four AoS 4x4 matrices
+    // 
+    inline Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 );
+
+    // Extract four AoS 4x4 matrices
+    // 
+    inline void get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const;
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( const Vector4 & col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( const Vector4 & col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( const Vector4 & col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( const Vector4 & col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, const Vector4 & vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( vec_float4 scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( const Vector4 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( const Quat & unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( const Vector3 & scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( const Vector3 & translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline vec_float4 determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A set of four 3x4 transformation matrices in structure-of-arrays format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( vec_float4 scalar );
+
+    // Replicate an AoS 3x4 transformation matrix
+    // 
+    inline Transform3( const Aos::Transform3 & tfrm );
+
+    // Insert four AoS 3x4 transformation matrices
+    // 
+    inline Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 );
+
+    // Extract four AoS 3x4 transformation matrices
+    // 
+    inline void get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const;
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( const Vector3 & col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( const Vector3 & col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( const Vector3 & scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( const Vector3 & translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#include "vec_soa.h"
+#include "quat_soa.h"
+#include "mat_soa.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos.h
index 4e0d6ee5c..01380b52e 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos.h
@@ -1,1452 +1,1452 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_C_H
-#define _VECTORMATH_MAT_AOS_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( &result->col0, &mat->col0 );
-    vmathV3Copy( &result->col1, &mat->col1 );
-    vmathV3Copy( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-}
-
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat->x;
-    qy = unitQuat->y;
-    qz = unitQuat->z;
-    qw = unitQuat->w;
-    qx2 = ( qx + qx );
-    qy2 = ( qy + qy );
-    qz2 = ( qz + qz );
-    qxqx2 = ( qx * qx2 );
-    qxqy2 = ( qx * qy2 );
-    qxqz2 = ( qx * qz2 );
-    qxqw2 = ( qw * qx2 );
-    qyqy2 = ( qy * qy2 );
-    qyqz2 = ( qy * qz2 );
-    qyqw2 = ( qw * qy2 );
-    qzqz2 = ( qz * qz2 );
-    qzqw2 = ( qw * qz2 );
-    vmathV3MakeFromElems( &result->col0, ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
-    vmathV3MakeFromElems( &result->col1, ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
-    vmathV3MakeFromElems( &result->col2, ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
-}
-
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
-}
-
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathM3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, mat, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col0 );
-}
-
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col1 );
-}
-
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col2 );
-}
-
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
-{
-    vmathV3Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
-{
-    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
-}
-
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    VmathMatrix3 tmpResult;
-    vmathV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
-    vmathV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
-    vmathV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
-    vmathM3Copy( result, &tmpResult );
-}
-
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    VmathVector3 tmp0, tmp1, tmp2;
-    float detinv;
-    vmathV3Cross( &tmp0, &mat->col1, &mat->col2 );
-    vmathV3Cross( &tmp1, &mat->col2, &mat->col0 );
-    vmathV3Cross( &tmp2, &mat->col0, &mat->col1 );
-    detinv = ( 1.0f / vmathV3Dot( &mat->col2, &tmp2 ) );
-    vmathV3MakeFromElems( &result->col0, ( tmp0.x * detinv ), ( tmp1.x * detinv ), ( tmp2.x * detinv ) );
-    vmathV3MakeFromElems( &result->col1, ( tmp0.y * detinv ), ( tmp1.y * detinv ), ( tmp2.y * detinv ) );
-    vmathV3MakeFromElems( &result->col2, ( tmp0.z * detinv ), ( tmp1.z * detinv ), ( tmp2.z * detinv ) );
-}
-
-static inline float vmathM3Determinant( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
-    return vmathV3Dot( &mat->col2, &tmpV3_0 );
-}
-
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Neg( &result->col0, &mat->col0 );
-    vmathV3Neg( &result->col1, &mat->col1 );
-    vmathV3Neg( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3AbsPerElem( &result->col0, &mat->col0 );
-    vmathV3AbsPerElem( &result->col1, &mat->col1 );
-    vmathV3AbsPerElem( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
-}
-
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
-{
-    float tmpX, tmpY, tmpZ;
-    tmpX = ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) );
-    tmpY = ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) );
-    tmpZ = ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) );
-    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    VmathMatrix3 tmpResult;
-    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM3Copy( result, &tmpResult );
-}
-
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeFromElems( &result->col1, 0.0f, c, s );
-    vmathV3MakeFromElems( &result->col2, 0.0f, -s, c );
-}
-
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeFromElems( &result->col0, c, 0.0f, -s );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeFromElems( &result->col2, s, 0.0f, c );
-}
-
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeFromElems( &result->col0, c, s, 0.0f );
-    vmathV3MakeFromElems( &result->col1, -s, c, 0.0f );
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ->x );
-    cX = cosf( radiansXYZ->x );
-    sY = sinf( radiansXYZ->y );
-    cY = cosf( radiansXYZ->y );
-    sZ = sinf( radiansXYZ->z );
-    cZ = cosf( radiansXYZ->z );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    vmathV3MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY );
-    vmathV3MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) );
-    vmathV3MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) );
-}
-
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
-{
-    float x, y, z, s, c, oneMinusC, xy, yz, zx;
-    s = sinf( radians );
-    c = cosf( radians );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = ( x * y );
-    yz = ( y * z );
-    zx = ( z * x );
-    oneMinusC = ( 1.0f - c );
-    vmathV3MakeFromElems( &result->col0, ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) );
-    vmathV3MakeFromElems( &result->col1, ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) );
-    vmathV3MakeFromElems( &result->col2, ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) );
-}
-
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    vmathM3MakeFromQ( result, unitQuat );
-}
-
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
-{
-    vmathV3MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f );
-    vmathV3MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f );
-    vmathV3MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z );
-}
-
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-}
-
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
-{
-    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
-}
-
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathM3GetRow( &tmpV3_0, mat, 0 );
-    vmathV3Print( &tmpV3_0 );
-    vmathM3GetRow( &tmpV3_1, mat, 1 );
-    vmathV3Print( &tmpV3_1 );
-    vmathM3GetRow( &tmpV3_2, mat, 2 );
-    vmathV3Print( &tmpV3_2 );
-}
-
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM3Print( mat );
-}
-
-#endif
-
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( &result->col0, &mat->col0 );
-    vmathV4Copy( &result->col1, &mat->col1 );
-    vmathV4Copy( &result->col2, &mat->col2 );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
-{
-    vmathV4MakeFromScalar( &result->col0, scalar );
-    vmathV4MakeFromScalar( &result->col1, scalar );
-    vmathV4MakeFromScalar( &result->col2, scalar );
-    vmathV4MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
-}
-
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-    vmathV4Copy( &result->col1, _col1 );
-    vmathV4Copy( &result->col2, _col2 );
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 mat;
-    vmathM3MakeFromQ( &mat, unitQuat );
-    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
-{
-    vmathV4Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
-{
-    vmathV4Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
-{
-    vmathV4Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
-{
-    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
-{
-    VmathVector4 tmpV3_0;
-    vmathM4GetCol( &tmpV3_0, result, col );
-    vmathV4SetElem( &tmpV3_0, row, val );
-    vmathM4SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
-{
-    VmathVector4 tmpV4_0;
-    vmathM4GetCol( &tmpV4_0, mat, col );
-    return vmathV4GetElem( &tmpV4_0, row );
-}
-
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col0 );
-}
-
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col1 );
-}
-
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col2 );
-}
-
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col3 );
-}
-
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
-{
-    vmathV4Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
-{
-    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
-}
-
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathMatrix4 tmpResult;
-    vmathV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
-    vmathV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
-    vmathV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
-    vmathV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathVector4 res0, res1, res2, res3;
-    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
-    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
-    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
-    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
-    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
-    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
-    vmathV4SetX( &res0, ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
-    vmathV4SetY( &res0, ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
-    vmathV4SetZ( &res0, ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
-    vmathV4SetW( &res0, ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
-    detInv = ( 1.0f / ( ( ( ( mA * res0.x ) + ( mE * res0.y ) ) + ( mI * res0.z ) ) + ( mM * res0.w ) ) );
-    vmathV4SetX( &res1, ( mI * tmp1 ) );
-    vmathV4SetY( &res1, ( mM * tmp0 ) );
-    vmathV4SetZ( &res1, ( mA * tmp1 ) );
-    vmathV4SetW( &res1, ( mE * tmp0 ) );
-    vmathV4SetX( &res3, ( mI * tmp3 ) );
-    vmathV4SetY( &res3, ( mM * tmp2 ) );
-    vmathV4SetZ( &res3, ( mA * tmp3 ) );
-    vmathV4SetW( &res3, ( mE * tmp2 ) );
-    vmathV4SetX( &res2, ( mI * tmp5 ) );
-    vmathV4SetY( &res2, ( mM * tmp4 ) );
-    vmathV4SetZ( &res2, ( mA * tmp5 ) );
-    vmathV4SetW( &res2, ( mE * tmp4 ) );
-    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
-    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
-    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
-    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
-    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
-    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
-    vmathV4SetX( &res2, ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.x ) );
-    vmathV4SetY( &res2, ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.y ) );
-    vmathV4SetZ( &res2, ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.z ) );
-    vmathV4SetW( &res2, ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.w ) );
-    vmathV4SetX( &res3, ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.x ) );
-    vmathV4SetY( &res3, ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.y ) );
-    vmathV4SetZ( &res3, ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.z ) );
-    vmathV4SetW( &res3, ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.w ) );
-    vmathV4SetX( &res1, ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.x ) );
-    vmathV4SetY( &res1, ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.y ) );
-    vmathV4SetZ( &res1, ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.z ) );
-    vmathV4SetW( &res1, ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.w ) );
-    vmathV4ScalarMul( &result->col0, &res0, detInv );
-    vmathV4ScalarMul( &result->col1, &res1, detInv );
-    vmathV4ScalarMul( &result->col2, &res2, detInv );
-    vmathV4ScalarMul( &result->col3, &res3, detInv );
-}
-
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3Inverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline float vmathM4Determinant( const VmathMatrix4 *mat )
-{
-    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
-    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
-    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
-    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
-    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
-    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
-    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
-    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
-    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
-    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
-    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
-}
-
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Neg( &result->col0, &mat->col0 );
-    vmathV4Neg( &result->col1, &mat->col1 );
-    vmathV4Neg( &result->col2, &mat->col2 );
-    vmathV4Neg( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4AbsPerElem( &result->col0, &mat->col0 );
-    vmathV4AbsPerElem( &result->col1, &mat->col1 );
-    vmathV4AbsPerElem( &result->col2, &mat->col2 );
-    vmathV4AbsPerElem( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
-    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
-}
-
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
-{
-    float tmpX, tmpY, tmpZ, tmpW;
-    tmpX = ( ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) ) + ( mat->col3.x * vec->w ) );
-    tmpY = ( ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) ) + ( mat->col3.y * vec->w ) );
-    tmpZ = ( ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) ) + ( mat->col3.z * vec->w ) );
-    tmpW = ( ( ( ( mat->col0.w * vec->x ) + ( mat->col1.w * vec->y ) ) + ( mat->col2.w * vec->z ) ) + ( mat->col3.w * vec->w ) );
-    vmathV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
-{
-    result->x = ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) );
-    result->y = ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) );
-    result->z = ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) );
-    result->w = ( ( ( mat->col0.w * vec->x ) + ( mat->col1.w * vec->y ) ) + ( mat->col2.w * vec->z ) );
-}
-
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
-{
-    result->x = ( ( ( ( mat->col0.x * pnt->x ) + ( mat->col1.x * pnt->y ) ) + ( mat->col2.x * pnt->z ) ) + mat->col3.x );
-    result->y = ( ( ( ( mat->col0.y * pnt->x ) + ( mat->col1.y * pnt->y ) ) + ( mat->col2.y * pnt->z ) ) + mat->col3.y );
-    result->z = ( ( ( ( mat->col0.z * pnt->x ) + ( mat->col1.z * pnt->y ) ) + ( mat->col2.z * pnt->z ) ) + mat->col3.z );
-    result->w = ( ( ( ( mat->col0.w * pnt->x ) + ( mat->col1.w * pnt->y ) ) + ( mat->col2.w * pnt->z ) ) + mat->col3.w );
-}
-
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    VmathMatrix4 tmpResult;
-    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
-{
-    VmathMatrix4 tmpResult;
-    VmathPoint3 tmpP3_0;
-    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
-    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
-    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
-{
-    vmathV4SetXYZ( &result->col0, &mat3->col0 );
-    vmathV4SetXYZ( &result->col1, &mat3->col1 );
-    vmathV4SetXYZ( &result->col2, &mat3->col2 );
-}
-
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( &result->col0, &mat->col0 );
-    vmathV4GetXYZ( &result->col1, &mat->col1 );
-    vmathV4GetXYZ( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4SetXYZ( &result->col3, translateVec );
-}
-
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( result, &mat->col3 );
-}
-
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeFromElems( &result->col1, 0.0f, c, s, 0.0f );
-    vmathV4MakeFromElems( &result->col2, 0.0f, -s, c, 0.0f );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV4MakeFromElems( &result->col0, c, 0.0f, -s, 0.0f );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeFromElems( &result->col2, s, 0.0f, c, 0.0f );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV4MakeFromElems( &result->col0, c, s, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col1, -s, c, 0.0f, 0.0f );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ->x );
-    cX = cosf( radiansXYZ->x );
-    sY = sinf( radiansXYZ->y );
-    cY = cosf( radiansXYZ->y );
-    sZ = sinf( radiansXYZ->z );
-    cZ = cosf( radiansXYZ->z );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    vmathV4MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY, 0.0f );
-    vmathV4MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f );
-    vmathV4MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
-{
-    float x, y, z, s, c, oneMinusC, xy, yz, zx;
-    s = sinf( radians );
-    c = cosf( radians );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = ( x * y );
-    yz = ( y * z );
-    zx = ( z * x );
-    oneMinusC = ( 1.0f - c );
-    vmathV4MakeFromElems( &result->col0, ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f );
-    vmathV4MakeFromElems( &result->col1, ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f );
-    vmathV4MakeFromElems( &result->col2, ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
-{
-    VmathTransform3 tmpT3_0;
-    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
-{
-    vmathV4MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z, 0.0f );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
-{
-    VmathVector4 scale4;
-    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
-    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
-    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
-    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
-    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
-}
-
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
-{
-    VmathMatrix4 m4EyeFrame;
-    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathV3Normalize( &v3Y, upVec );
-    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
-    vmathV3Normalize( &v3Z, &tmpV3_0 );
-    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
-    vmathV3Normalize( &v3X, &tmpV3_1 );
-    vmathV3Cross( &v3Y, &v3Z, &v3X );
-    vmathV4MakeFromV3( &tmpV4_0, &v3X );
-    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
-    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
-    vmathV4MakeFromP3( &tmpV4_3, eyePos );
-    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
-    vmathM4OrthoInverse( result, &m4EyeFrame );
-}
-
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
-    rangeInv = ( 1.0f / ( zNear - zFar ) );
-    vmathV4MakeFromElems( &result->col0, ( f / aspect ), 0.0f, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col1, 0.0f, f, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f );
-    vmathV4MakeFromElems( &result->col3, 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f );
-}
-
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = ( right + left );
-    sum_tb = ( top + bottom );
-    sum_nf = ( zNear + zFar );
-    inv_rl = ( 1.0f / ( right - left ) );
-    inv_tb = ( 1.0f / ( top - bottom ) );
-    inv_nf = ( 1.0f / ( zNear - zFar ) );
-    n2 = ( zNear + zNear );
-    vmathV4MakeFromElems( &result->col0, ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col1, 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col2, ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f );
-    vmathV4MakeFromElems( &result->col3, 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f );
-}
-
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = ( right + left );
-    sum_tb = ( top + bottom );
-    sum_nf = ( zNear + zFar );
-    inv_rl = ( 1.0f / ( right - left ) );
-    inv_tb = ( 1.0f / ( top - bottom ) );
-    inv_nf = ( 1.0f / ( zNear - zFar ) );
-    vmathV4MakeFromElems( &result->col0, ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col1, 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f );
-    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f );
-    vmathV4MakeFromElems( &result->col3, ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f );
-}
-
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
-{
-    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print( const VmathMatrix4 *mat )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathM4GetRow( &tmpV4_0, mat, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathM4GetRow( &tmpV4_1, mat, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathM4GetRow( &tmpV4_2, mat, 2 );
-    vmathV4Print( &tmpV4_2 );
-    vmathM4GetRow( &tmpV4_3, mat, 3 );
-    vmathV4Print( &tmpV4_3 );
-}
-
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM4Print( mat );
-}
-
-#endif
-
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-    vmathV3MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
-{
-    vmathT3SetUpper3x3( result, tfrm );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 tmpM3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathT3SetUpper3x3( result, &tmpM3_0 );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathT3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, tfrm, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col0 );
-}
-
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col1 );
-}
-
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col2 );
-}
-
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
-{
-    vmathV3Copy( result, (&tfrm->col0 + col) );
-}
-
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
-{
-    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
-}
-
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    VmathVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    float detinv;
-    vmathV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
-    vmathV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
-    vmathV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
-    detinv = ( 1.0f / vmathV3Dot( &tfrm->col2, &tmp2 ) );
-    vmathV3MakeFromElems( &inv0, ( tmp0.x * detinv ), ( tmp1.x * detinv ), ( tmp2.x * detinv ) );
-    vmathV3MakeFromElems( &inv1, ( tmp0.y * detinv ), ( tmp1.y * detinv ), ( tmp2.y * detinv ) );
-    vmathV3MakeFromElems( &inv2, ( tmp0.z * detinv ), ( tmp1.z * detinv ), ( tmp2.z * detinv ) );
-    vmathV3Copy( &result->col0, &inv0 );
-    vmathV3Copy( &result->col1, &inv1 );
-    vmathV3Copy( &result->col2, &inv2 );
-    vmathV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    VmathVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    vmathV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
-    vmathV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
-    vmathV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
-    vmathV3Copy( &result->col0, &inv0 );
-    vmathV3Copy( &result->col1, &inv1 );
-    vmathV3Copy( &result->col2, &inv2 );
-    vmathV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
-    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
-    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
-    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
-{
-    float tmpX, tmpY, tmpZ;
-    tmpX = ( ( ( tfrm->col0.x * vec->x ) + ( tfrm->col1.x * vec->y ) ) + ( tfrm->col2.x * vec->z ) );
-    tmpY = ( ( ( tfrm->col0.y * vec->x ) + ( tfrm->col1.y * vec->y ) ) + ( tfrm->col2.y * vec->z ) );
-    tmpZ = ( ( ( tfrm->col0.z * vec->x ) + ( tfrm->col1.z * vec->y ) ) + ( tfrm->col2.z * vec->z ) );
-    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
-{
-    float tmpX, tmpY, tmpZ;
-    tmpX = ( ( ( ( tfrm->col0.x * pnt->x ) + ( tfrm->col1.x * pnt->y ) ) + ( tfrm->col2.x * pnt->z ) ) + tfrm->col3.x );
-    tmpY = ( ( ( ( tfrm->col0.y * pnt->x ) + ( tfrm->col1.y * pnt->y ) ) + ( tfrm->col2.y * pnt->z ) ) + tfrm->col3.y );
-    tmpZ = ( ( ( ( tfrm->col0.z * pnt->x ) + ( tfrm->col1.z * pnt->y ) ) + ( tfrm->col2.z * pnt->z ) ) + tfrm->col3.z );
-    vmathP3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    VmathTransform3 tmpResult;
-    VmathPoint3 tmpP3_0, tmpP3_1;
-    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
-    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
-    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
-    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
-    vmathT3Copy( result, &tmpResult );
-}
-
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
-    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
-    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
-    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
-}
-
-static inline void vmathT3MakeIdentity( VmathTransform3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-}
-
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
-{
-    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
-}
-
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeFromElems( &result->col1, 0.0f, c, s );
-    vmathV3MakeFromElems( &result->col2, 0.0f, -s, c );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeFromElems( &result->col0, c, 0.0f, -s );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeFromElems( &result->col2, s, 0.0f, c );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    vmathV3MakeFromElems( &result->col0, c, s, 0.0f );
-    vmathV3MakeFromElems( &result->col1, -s, c, 0.0f );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ->x );
-    cX = cosf( radiansXYZ->x );
-    sY = sinf( radiansXYZ->y );
-    cY = cosf( radiansXYZ->y );
-    sZ = sinf( radiansXYZ->z );
-    cZ = cosf( radiansXYZ->z );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    vmathV3MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY );
-    vmathV3MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) );
-    vmathV3MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
-{
-    vmathV3MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f );
-    vmathV3MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f );
-    vmathV3MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
-    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
-}
-
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
-    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
-    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
-    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print( const VmathTransform3 *tfrm )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
-    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
-    vmathV4Print( &tmpV4_2 );
-}
-
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
-{
-    printf("%s:\n", name);
-    vmathT3Print( tfrm );
-}
-
-#endif
-
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
-{
-    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    int negTrace, ZgtX, ZgtY, YgtX;
-    int largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm->col0.x;
-    yx = tfrm->col0.y;
-    zx = tfrm->col0.z;
-    xy = tfrm->col1.x;
-    yy = tfrm->col1.y;
-    zy = tfrm->col1.z;
-    xz = tfrm->col2.x;
-    yz = tfrm->col2.y;
-    zz = tfrm->col2.z;
-
-    trace = ( ( xx + yy ) + zz );
-
-    negTrace = ( trace < 0.0f );
-    ZgtX = zz > xx;
-    ZgtY = zz > yy;
-    YgtX = yy > xx;
-    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
-    largestYorZ = ( YgtX || ZgtX ) && negTrace;
-    largestZorX = ( ZgtY || !YgtX ) && negTrace;
-    
-    if ( largestXorY )
-    {
-        zz = -zz;
-        xy = -xy;
-    }
-    if ( largestYorZ )
-    {
-        xx = -xx;
-        yz = -yz;
-    }
-    if ( largestZorX )
-    {
-        yy = -yy;
-        zx = -zx;
-    }
-
-    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
-    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
-
-    tmpx = ( ( zy - yz ) * scale );
-    tmpy = ( ( xz - zx ) * scale );
-    tmpz = ( ( yx - xy ) * scale );
-    tmpw = ( radicand * scale );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    if ( largestXorY )
-    {
-        qx = tmpw;
-        qy = tmpz;
-        qz = tmpy;
-        qw = tmpx;
-    }
-    if ( largestYorZ )
-    {
-        tmpx = qx;
-        tmpz = qz;
-        qx = qy;
-        qy = tmpx;
-        qz = qw;
-        qw = tmpz;
-    }
-
-    result->x = qx;
-    result->y = qy;
-    result->z = qz;
-    result->w = qw;
-}
-
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
-{
-    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
-    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
-    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
-}
-
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
-{
-    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
-    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
-    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
-    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
-}
-
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    float tmpX, tmpY, tmpZ;
-    tmpX = ( ( ( vec->x * mat->col0.x ) + ( vec->y * mat->col0.y ) ) + ( vec->z * mat->col0.z ) );
-    tmpY = ( ( ( vec->x * mat->col1.x ) + ( vec->y * mat->col1.y ) ) + ( vec->z * mat->col1.z ) );
-    tmpZ = ( ( ( vec->x * mat->col2.x ) + ( vec->y * mat->col2.y ) ) + ( vec->z * mat->col2.z ) );
-    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
-{
-    vmathV3MakeFromElems( &result->col0, 0.0f, vec->z, -vec->y );
-    vmathV3MakeFromElems( &result->col1, -vec->z, 0.0f, vec->x );
-    vmathV3MakeFromElems( &result->col2, vec->y, -vec->x, 0.0f );
-}
-
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
-    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
-    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
-    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_C_H
+#define _VECTORMATH_MAT_AOS_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( &result->col0, &mat->col0 );
+    vmathV3Copy( &result->col1, &mat->col1 );
+    vmathV3Copy( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+}
+
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat->x;
+    qy = unitQuat->y;
+    qz = unitQuat->z;
+    qw = unitQuat->w;
+    qx2 = ( qx + qx );
+    qy2 = ( qy + qy );
+    qz2 = ( qz + qz );
+    qxqx2 = ( qx * qx2 );
+    qxqy2 = ( qx * qy2 );
+    qxqz2 = ( qx * qz2 );
+    qxqw2 = ( qw * qx2 );
+    qyqy2 = ( qy * qy2 );
+    qyqz2 = ( qy * qz2 );
+    qyqw2 = ( qw * qy2 );
+    qzqz2 = ( qz * qz2 );
+    qzqw2 = ( qw * qz2 );
+    vmathV3MakeFromElems( &result->col0, ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
+    vmathV3MakeFromElems( &result->col1, ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
+    vmathV3MakeFromElems( &result->col2, ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
+}
+
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
+}
+
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathM3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, mat, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col0 );
+}
+
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col1 );
+}
+
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col2 );
+}
+
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
+{
+    vmathV3Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
+{
+    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
+}
+
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    VmathMatrix3 tmpResult;
+    vmathV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
+    vmathV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
+    vmathV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
+    vmathM3Copy( result, &tmpResult );
+}
+
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    VmathVector3 tmp0, tmp1, tmp2;
+    float detinv;
+    vmathV3Cross( &tmp0, &mat->col1, &mat->col2 );
+    vmathV3Cross( &tmp1, &mat->col2, &mat->col0 );
+    vmathV3Cross( &tmp2, &mat->col0, &mat->col1 );
+    detinv = ( 1.0f / vmathV3Dot( &mat->col2, &tmp2 ) );
+    vmathV3MakeFromElems( &result->col0, ( tmp0.x * detinv ), ( tmp1.x * detinv ), ( tmp2.x * detinv ) );
+    vmathV3MakeFromElems( &result->col1, ( tmp0.y * detinv ), ( tmp1.y * detinv ), ( tmp2.y * detinv ) );
+    vmathV3MakeFromElems( &result->col2, ( tmp0.z * detinv ), ( tmp1.z * detinv ), ( tmp2.z * detinv ) );
+}
+
+static inline float vmathM3Determinant( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
+    return vmathV3Dot( &mat->col2, &tmpV3_0 );
+}
+
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Neg( &result->col0, &mat->col0 );
+    vmathV3Neg( &result->col1, &mat->col1 );
+    vmathV3Neg( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3AbsPerElem( &result->col0, &mat->col0 );
+    vmathV3AbsPerElem( &result->col1, &mat->col1 );
+    vmathV3AbsPerElem( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
+}
+
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
+{
+    float tmpX, tmpY, tmpZ;
+    tmpX = ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) );
+    tmpY = ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) );
+    tmpZ = ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) );
+    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    VmathMatrix3 tmpResult;
+    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM3Copy( result, &tmpResult );
+}
+
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeFromElems( &result->col1, 0.0f, c, s );
+    vmathV3MakeFromElems( &result->col2, 0.0f, -s, c );
+}
+
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeFromElems( &result->col0, c, 0.0f, -s );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeFromElems( &result->col2, s, 0.0f, c );
+}
+
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeFromElems( &result->col0, c, s, 0.0f );
+    vmathV3MakeFromElems( &result->col1, -s, c, 0.0f );
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ->x );
+    cX = cosf( radiansXYZ->x );
+    sY = sinf( radiansXYZ->y );
+    cY = cosf( radiansXYZ->y );
+    sZ = sinf( radiansXYZ->z );
+    cZ = cosf( radiansXYZ->z );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    vmathV3MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY );
+    vmathV3MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) );
+    vmathV3MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) );
+}
+
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    vmathV3MakeFromElems( &result->col0, ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) );
+    vmathV3MakeFromElems( &result->col1, ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) );
+    vmathV3MakeFromElems( &result->col2, ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) );
+}
+
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    vmathM3MakeFromQ( result, unitQuat );
+}
+
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
+{
+    vmathV3MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f );
+    vmathV3MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f );
+    vmathV3MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z );
+}
+
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+}
+
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
+{
+    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
+}
+
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathM3GetRow( &tmpV3_0, mat, 0 );
+    vmathV3Print( &tmpV3_0 );
+    vmathM3GetRow( &tmpV3_1, mat, 1 );
+    vmathV3Print( &tmpV3_1 );
+    vmathM3GetRow( &tmpV3_2, mat, 2 );
+    vmathV3Print( &tmpV3_2 );
+}
+
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM3Print( mat );
+}
+
+#endif
+
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( &result->col0, &mat->col0 );
+    vmathV4Copy( &result->col1, &mat->col1 );
+    vmathV4Copy( &result->col2, &mat->col2 );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
+{
+    vmathV4MakeFromScalar( &result->col0, scalar );
+    vmathV4MakeFromScalar( &result->col1, scalar );
+    vmathV4MakeFromScalar( &result->col2, scalar );
+    vmathV4MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
+}
+
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+    vmathV4Copy( &result->col1, _col1 );
+    vmathV4Copy( &result->col2, _col2 );
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 mat;
+    vmathM3MakeFromQ( &mat, unitQuat );
+    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
+{
+    vmathV4Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
+{
+    vmathV4Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
+{
+    vmathV4Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
+{
+    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
+{
+    VmathVector4 tmpV3_0;
+    vmathM4GetCol( &tmpV3_0, result, col );
+    vmathV4SetElem( &tmpV3_0, row, val );
+    vmathM4SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
+{
+    VmathVector4 tmpV4_0;
+    vmathM4GetCol( &tmpV4_0, mat, col );
+    return vmathV4GetElem( &tmpV4_0, row );
+}
+
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col0 );
+}
+
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col1 );
+}
+
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col2 );
+}
+
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col3 );
+}
+
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
+{
+    vmathV4Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
+{
+    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
+}
+
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathMatrix4 tmpResult;
+    vmathV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
+    vmathV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
+    vmathV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
+    vmathV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathVector4 res0, res1, res2, res3;
+    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    vmathV4SetX( &res0, ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
+    vmathV4SetY( &res0, ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
+    vmathV4SetZ( &res0, ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
+    vmathV4SetW( &res0, ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
+    detInv = ( 1.0f / ( ( ( ( mA * res0.x ) + ( mE * res0.y ) ) + ( mI * res0.z ) ) + ( mM * res0.w ) ) );
+    vmathV4SetX( &res1, ( mI * tmp1 ) );
+    vmathV4SetY( &res1, ( mM * tmp0 ) );
+    vmathV4SetZ( &res1, ( mA * tmp1 ) );
+    vmathV4SetW( &res1, ( mE * tmp0 ) );
+    vmathV4SetX( &res3, ( mI * tmp3 ) );
+    vmathV4SetY( &res3, ( mM * tmp2 ) );
+    vmathV4SetZ( &res3, ( mA * tmp3 ) );
+    vmathV4SetW( &res3, ( mE * tmp2 ) );
+    vmathV4SetX( &res2, ( mI * tmp5 ) );
+    vmathV4SetY( &res2, ( mM * tmp4 ) );
+    vmathV4SetZ( &res2, ( mA * tmp5 ) );
+    vmathV4SetW( &res2, ( mE * tmp4 ) );
+    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
+    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
+    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
+    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
+    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
+    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
+    vmathV4SetX( &res2, ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.x ) );
+    vmathV4SetY( &res2, ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.y ) );
+    vmathV4SetZ( &res2, ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.z ) );
+    vmathV4SetW( &res2, ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.w ) );
+    vmathV4SetX( &res3, ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.x ) );
+    vmathV4SetY( &res3, ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.y ) );
+    vmathV4SetZ( &res3, ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.z ) );
+    vmathV4SetW( &res3, ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.w ) );
+    vmathV4SetX( &res1, ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.x ) );
+    vmathV4SetY( &res1, ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.y ) );
+    vmathV4SetZ( &res1, ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.z ) );
+    vmathV4SetW( &res1, ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.w ) );
+    vmathV4ScalarMul( &result->col0, &res0, detInv );
+    vmathV4ScalarMul( &result->col1, &res1, detInv );
+    vmathV4ScalarMul( &result->col2, &res2, detInv );
+    vmathV4ScalarMul( &result->col3, &res3, detInv );
+}
+
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3Inverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline float vmathM4Determinant( const VmathMatrix4 *mat )
+{
+    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
+    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
+    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
+    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
+    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
+}
+
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Neg( &result->col0, &mat->col0 );
+    vmathV4Neg( &result->col1, &mat->col1 );
+    vmathV4Neg( &result->col2, &mat->col2 );
+    vmathV4Neg( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4AbsPerElem( &result->col0, &mat->col0 );
+    vmathV4AbsPerElem( &result->col1, &mat->col1 );
+    vmathV4AbsPerElem( &result->col2, &mat->col2 );
+    vmathV4AbsPerElem( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
+    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
+}
+
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
+{
+    float tmpX, tmpY, tmpZ, tmpW;
+    tmpX = ( ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) ) + ( mat->col3.x * vec->w ) );
+    tmpY = ( ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) ) + ( mat->col3.y * vec->w ) );
+    tmpZ = ( ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) ) + ( mat->col3.z * vec->w ) );
+    tmpW = ( ( ( ( mat->col0.w * vec->x ) + ( mat->col1.w * vec->y ) ) + ( mat->col2.w * vec->z ) ) + ( mat->col3.w * vec->w ) );
+    vmathV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
+{
+    result->x = ( ( ( mat->col0.x * vec->x ) + ( mat->col1.x * vec->y ) ) + ( mat->col2.x * vec->z ) );
+    result->y = ( ( ( mat->col0.y * vec->x ) + ( mat->col1.y * vec->y ) ) + ( mat->col2.y * vec->z ) );
+    result->z = ( ( ( mat->col0.z * vec->x ) + ( mat->col1.z * vec->y ) ) + ( mat->col2.z * vec->z ) );
+    result->w = ( ( ( mat->col0.w * vec->x ) + ( mat->col1.w * vec->y ) ) + ( mat->col2.w * vec->z ) );
+}
+
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
+{
+    result->x = ( ( ( ( mat->col0.x * pnt->x ) + ( mat->col1.x * pnt->y ) ) + ( mat->col2.x * pnt->z ) ) + mat->col3.x );
+    result->y = ( ( ( ( mat->col0.y * pnt->x ) + ( mat->col1.y * pnt->y ) ) + ( mat->col2.y * pnt->z ) ) + mat->col3.y );
+    result->z = ( ( ( ( mat->col0.z * pnt->x ) + ( mat->col1.z * pnt->y ) ) + ( mat->col2.z * pnt->z ) ) + mat->col3.z );
+    result->w = ( ( ( ( mat->col0.w * pnt->x ) + ( mat->col1.w * pnt->y ) ) + ( mat->col2.w * pnt->z ) ) + mat->col3.w );
+}
+
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    VmathMatrix4 tmpResult;
+    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
+{
+    VmathMatrix4 tmpResult;
+    VmathPoint3 tmpP3_0;
+    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
+    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
+    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
+{
+    vmathV4SetXYZ( &result->col0, &mat3->col0 );
+    vmathV4SetXYZ( &result->col1, &mat3->col1 );
+    vmathV4SetXYZ( &result->col2, &mat3->col2 );
+}
+
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( &result->col0, &mat->col0 );
+    vmathV4GetXYZ( &result->col1, &mat->col1 );
+    vmathV4GetXYZ( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4SetXYZ( &result->col3, translateVec );
+}
+
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( result, &mat->col3 );
+}
+
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeFromElems( &result->col1, 0.0f, c, s, 0.0f );
+    vmathV4MakeFromElems( &result->col2, 0.0f, -s, c, 0.0f );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV4MakeFromElems( &result->col0, c, 0.0f, -s, 0.0f );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeFromElems( &result->col2, s, 0.0f, c, 0.0f );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV4MakeFromElems( &result->col0, c, s, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col1, -s, c, 0.0f, 0.0f );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ->x );
+    cX = cosf( radiansXYZ->x );
+    sY = sinf( radiansXYZ->y );
+    cY = cosf( radiansXYZ->y );
+    sZ = sinf( radiansXYZ->z );
+    cZ = cosf( radiansXYZ->z );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    vmathV4MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY, 0.0f );
+    vmathV4MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f );
+    vmathV4MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    vmathV4MakeFromElems( &result->col0, ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f );
+    vmathV4MakeFromElems( &result->col1, ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f );
+    vmathV4MakeFromElems( &result->col2, ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
+{
+    VmathTransform3 tmpT3_0;
+    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
+{
+    vmathV4MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z, 0.0f );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
+{
+    VmathVector4 scale4;
+    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
+    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
+    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
+    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
+    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
+}
+
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
+{
+    VmathMatrix4 m4EyeFrame;
+    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathV3Normalize( &v3Y, upVec );
+    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
+    vmathV3Normalize( &v3Z, &tmpV3_0 );
+    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
+    vmathV3Normalize( &v3X, &tmpV3_1 );
+    vmathV3Cross( &v3Y, &v3Z, &v3X );
+    vmathV4MakeFromV3( &tmpV4_0, &v3X );
+    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
+    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
+    vmathV4MakeFromP3( &tmpV4_3, eyePos );
+    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
+    vmathM4OrthoInverse( result, &m4EyeFrame );
+}
+
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
+    rangeInv = ( 1.0f / ( zNear - zFar ) );
+    vmathV4MakeFromElems( &result->col0, ( f / aspect ), 0.0f, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col1, 0.0f, f, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f );
+    vmathV4MakeFromElems( &result->col3, 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f );
+}
+
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    n2 = ( zNear + zNear );
+    vmathV4MakeFromElems( &result->col0, ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col1, 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col2, ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f );
+    vmathV4MakeFromElems( &result->col3, 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f );
+}
+
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    vmathV4MakeFromElems( &result->col0, ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col1, 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f );
+    vmathV4MakeFromElems( &result->col2, 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f );
+    vmathV4MakeFromElems( &result->col3, ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f );
+}
+
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
+{
+    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print( const VmathMatrix4 *mat )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathM4GetRow( &tmpV4_0, mat, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathM4GetRow( &tmpV4_1, mat, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathM4GetRow( &tmpV4_2, mat, 2 );
+    vmathV4Print( &tmpV4_2 );
+    vmathM4GetRow( &tmpV4_3, mat, 3 );
+    vmathV4Print( &tmpV4_3 );
+}
+
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM4Print( mat );
+}
+
+#endif
+
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+    vmathV3MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
+{
+    vmathT3SetUpper3x3( result, tfrm );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 tmpM3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathT3SetUpper3x3( result, &tmpM3_0 );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathT3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, tfrm, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col0 );
+}
+
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col1 );
+}
+
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col2 );
+}
+
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
+{
+    vmathV3Copy( result, (&tfrm->col0 + col) );
+}
+
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
+{
+    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
+}
+
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    VmathVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    float detinv;
+    vmathV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
+    vmathV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
+    vmathV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
+    detinv = ( 1.0f / vmathV3Dot( &tfrm->col2, &tmp2 ) );
+    vmathV3MakeFromElems( &inv0, ( tmp0.x * detinv ), ( tmp1.x * detinv ), ( tmp2.x * detinv ) );
+    vmathV3MakeFromElems( &inv1, ( tmp0.y * detinv ), ( tmp1.y * detinv ), ( tmp2.y * detinv ) );
+    vmathV3MakeFromElems( &inv2, ( tmp0.z * detinv ), ( tmp1.z * detinv ), ( tmp2.z * detinv ) );
+    vmathV3Copy( &result->col0, &inv0 );
+    vmathV3Copy( &result->col1, &inv1 );
+    vmathV3Copy( &result->col2, &inv2 );
+    vmathV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    VmathVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    vmathV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
+    vmathV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
+    vmathV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
+    vmathV3Copy( &result->col0, &inv0 );
+    vmathV3Copy( &result->col1, &inv1 );
+    vmathV3Copy( &result->col2, &inv2 );
+    vmathV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
+    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
+    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
+    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
+{
+    float tmpX, tmpY, tmpZ;
+    tmpX = ( ( ( tfrm->col0.x * vec->x ) + ( tfrm->col1.x * vec->y ) ) + ( tfrm->col2.x * vec->z ) );
+    tmpY = ( ( ( tfrm->col0.y * vec->x ) + ( tfrm->col1.y * vec->y ) ) + ( tfrm->col2.y * vec->z ) );
+    tmpZ = ( ( ( tfrm->col0.z * vec->x ) + ( tfrm->col1.z * vec->y ) ) + ( tfrm->col2.z * vec->z ) );
+    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
+{
+    float tmpX, tmpY, tmpZ;
+    tmpX = ( ( ( ( tfrm->col0.x * pnt->x ) + ( tfrm->col1.x * pnt->y ) ) + ( tfrm->col2.x * pnt->z ) ) + tfrm->col3.x );
+    tmpY = ( ( ( ( tfrm->col0.y * pnt->x ) + ( tfrm->col1.y * pnt->y ) ) + ( tfrm->col2.y * pnt->z ) ) + tfrm->col3.y );
+    tmpZ = ( ( ( ( tfrm->col0.z * pnt->x ) + ( tfrm->col1.z * pnt->y ) ) + ( tfrm->col2.z * pnt->z ) ) + tfrm->col3.z );
+    vmathP3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    VmathTransform3 tmpResult;
+    VmathPoint3 tmpP3_0, tmpP3_1;
+    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
+    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
+    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
+    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
+    vmathT3Copy( result, &tmpResult );
+}
+
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
+    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
+    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
+    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
+}
+
+static inline void vmathT3MakeIdentity( VmathTransform3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+}
+
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
+{
+    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
+}
+
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeFromElems( &result->col1, 0.0f, c, s );
+    vmathV3MakeFromElems( &result->col2, 0.0f, -s, c );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeFromElems( &result->col0, c, 0.0f, -s );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeFromElems( &result->col2, s, 0.0f, c );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    vmathV3MakeFromElems( &result->col0, c, s, 0.0f );
+    vmathV3MakeFromElems( &result->col1, -s, c, 0.0f );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ->x );
+    cX = cosf( radiansXYZ->x );
+    sY = sinf( radiansXYZ->y );
+    cY = cosf( radiansXYZ->y );
+    sZ = sinf( radiansXYZ->z );
+    cZ = cosf( radiansXYZ->z );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    vmathV3MakeFromElems( &result->col0, ( cZ * cY ), ( sZ * cY ), -sY );
+    vmathV3MakeFromElems( &result->col1, ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) );
+    vmathV3MakeFromElems( &result->col2, ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
+{
+    vmathV3MakeFromElems( &result->col0, scaleVec->x, 0.0f, 0.0f );
+    vmathV3MakeFromElems( &result->col1, 0.0f, scaleVec->y, 0.0f );
+    vmathV3MakeFromElems( &result->col2, 0.0f, 0.0f, scaleVec->z );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
+    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
+}
+
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
+    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
+    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
+    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print( const VmathTransform3 *tfrm )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
+    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
+    vmathV4Print( &tmpV4_2 );
+}
+
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
+{
+    printf("%s:\n", name);
+    vmathT3Print( tfrm );
+}
+
+#endif
+
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
+{
+    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    int negTrace, ZgtX, ZgtY, YgtX;
+    int largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm->col0.x;
+    yx = tfrm->col0.y;
+    zx = tfrm->col0.z;
+    xy = tfrm->col1.x;
+    yy = tfrm->col1.y;
+    zy = tfrm->col1.z;
+    xz = tfrm->col2.x;
+    yz = tfrm->col2.y;
+    zz = tfrm->col2.z;
+
+    trace = ( ( xx + yy ) + zz );
+
+    negTrace = ( trace < 0.0f );
+    ZgtX = zz > xx;
+    ZgtY = zz > yy;
+    YgtX = yy > xx;
+    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
+    largestYorZ = ( YgtX || ZgtX ) && negTrace;
+    largestZorX = ( ZgtY || !YgtX ) && negTrace;
+    
+    if ( largestXorY )
+    {
+        zz = -zz;
+        xy = -xy;
+    }
+    if ( largestYorZ )
+    {
+        xx = -xx;
+        yz = -yz;
+    }
+    if ( largestZorX )
+    {
+        yy = -yy;
+        zx = -zx;
+    }
+
+    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
+    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
+
+    tmpx = ( ( zy - yz ) * scale );
+    tmpy = ( ( xz - zx ) * scale );
+    tmpz = ( ( yx - xy ) * scale );
+    tmpw = ( radicand * scale );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    if ( largestXorY )
+    {
+        qx = tmpw;
+        qy = tmpz;
+        qz = tmpy;
+        qw = tmpx;
+    }
+    if ( largestYorZ )
+    {
+        tmpx = qx;
+        tmpz = qz;
+        qx = qy;
+        qy = tmpx;
+        qz = qw;
+        qw = tmpz;
+    }
+
+    result->x = qx;
+    result->y = qy;
+    result->z = qz;
+    result->w = qw;
+}
+
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
+{
+    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
+    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
+    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
+}
+
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
+{
+    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
+    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
+    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
+    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
+}
+
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    float tmpX, tmpY, tmpZ;
+    tmpX = ( ( ( vec->x * mat->col0.x ) + ( vec->y * mat->col0.y ) ) + ( vec->z * mat->col0.z ) );
+    tmpY = ( ( ( vec->x * mat->col1.x ) + ( vec->y * mat->col1.y ) ) + ( vec->z * mat->col1.z ) );
+    tmpZ = ( ( ( vec->x * mat->col2.x ) + ( vec->y * mat->col2.y ) ) + ( vec->z * mat->col2.z ) );
+    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
+{
+    vmathV3MakeFromElems( &result->col0, 0.0f, vec->z, -vec->y );
+    vmathV3MakeFromElems( &result->col1, -vec->z, 0.0f, vec->x );
+    vmathV3MakeFromElems( &result->col2, vec->y, -vec->x, 0.0f );
+}
+
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
+    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
+    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
+    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos_v.h
index 270ffcb50..6d1c3cd7c 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/mat_aos_v.h
@@ -1,1006 +1,1006 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_V_C_H
-#define _VECTORMATH_MAT_AOS_V_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
-    return result;
-}
-
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
-{
-    vmathM3SetCol0(result, &_col0);
-}
-
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
-{
-    vmathM3SetCol1(result, &_col1);
-}
-
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
-{
-    vmathM3SetCol2(result, &_col2);
-}
-
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
-{
-    vmathM3SetCol(result, col, &vec);
-}
-
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
-{
-    vmathM3SetRow(result, row, &vec);
-}
-
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
-{
-    vmathM3SetElem(result, col, row, val);
-}
-
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
-{
-    return vmathM3GetElem(&mat, col, row);
-}
-
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
-{
-    VmathVector3 result;
-    vmathM3GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
-{
-    VmathVector3 result;
-    vmathM3GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Inverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM3Determinant_V( VmathMatrix3 mat )
-{
-    return vmathM3Determinant(&mat);
-}
-
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathM3MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeIdentity_V( )
-{
-    VmathMatrix3 result;
-    vmathM3MakeIdentity(&result);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
-{
-    VmathMatrix3 result;
-    vmathM3Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print_V( VmathMatrix3 mat )
-{
-    vmathM3Print(&mat);
-}
-
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
-{
-    vmathM3Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromT3(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
-{
-    vmathM4SetCol0(result, &_col0);
-}
-
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
-{
-    vmathM4SetCol1(result, &_col1);
-}
-
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
-{
-    vmathM4SetCol2(result, &_col2);
-}
-
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
-{
-    vmathM4SetCol3(result, &_col3);
-}
-
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
-{
-    vmathM4SetCol(result, col, &vec);
-}
-
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
-{
-    vmathM4SetRow(result, row, &vec);
-}
-
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
-{
-    vmathM4SetElem(result, col, row, val);
-}
-
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
-{
-    return vmathM4GetElem(&mat, col, row);
-}
-
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol3(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
-{
-    VmathVector4 result;
-    vmathM4GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
-{
-    VmathVector4 result;
-    vmathM4GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Inverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AffineInverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4OrthoInverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM4Determinant_V( VmathMatrix4 mat )
-{
-    return vmathM4Determinant(&mat);
-}
-
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV4(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathM4MulP3(&result, &mat, &pnt);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulT3(&result, &mat, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeIdentity_V( )
-{
-    VmathMatrix4 result;
-    vmathM4MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
-{
-    vmathM4SetUpper3x3(result, &mat3);
-}
-
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
-{
-    VmathMatrix3 result;
-    vmathM4GetUpper3x3(&result, &mat);
-    return result;
-}
-
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
-{
-    vmathM4SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
-{
-    VmathVector3 result;
-    vmathM4GetTranslation(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
-{
-    VmathMatrix4 result;
-    vmathM4Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print_V( VmathMatrix4 mat )
-{
-    vmathM4Print(&mat);
-}
-
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
-{
-    vmathM4Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
-{
-    vmathT3SetCol0(result, &_col0);
-}
-
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
-{
-    vmathT3SetCol1(result, &_col1);
-}
-
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
-{
-    vmathT3SetCol2(result, &_col2);
-}
-
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
-{
-    vmathT3SetCol3(result, &_col3);
-}
-
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
-{
-    vmathT3SetCol(result, col, &vec);
-}
-
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
-{
-    vmathT3SetRow(result, row, &vec);
-}
-
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
-{
-    vmathT3SetElem(result, col, row, val);
-}
-
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
-{
-    return vmathT3GetElem(&tfrm, col, row);
-}
-
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol0(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol1(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol2(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
-{
-    VmathVector3 result;
-    vmathT3GetCol(&result, &tfrm, col);
-    return result;
-}
-
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
-{
-    VmathVector4 result;
-    vmathT3GetRow(&result, &tfrm, row);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3Inverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3OrthoInverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3AbsPerElem(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathT3MulV3(&result, &tfrm, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathT3MulP3(&result, &tfrm, &pnt);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3Mul(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeIdentity_V( )
-{
-    VmathTransform3 result;
-    vmathT3MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
-{
-    vmathT3SetUpper3x3(result, &tfrm);
-}
-
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
-{
-    VmathMatrix3 result;
-    vmathT3GetUpper3x3(&result, &tfrm);
-    return result;
-}
-
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
-{
-    vmathT3SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetTranslation(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3AppendScale(&result, &tfrm, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3PrependScale(&result, &scaleVec, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
-{
-    VmathTransform3 result;
-    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print_V( VmathTransform3 tfrm )
-{
-    vmathT3Print(&tfrm);
-}
-
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
-{
-    vmathT3Prints(&tfrm, name);
-}
-
-#endif
-
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
-{
-    VmathQuat result;
-    vmathQMakeFromM3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
-{
-    VmathMatrix3 result;
-    vmathV3Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathV4Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathV3RowMul(&result, &vec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrix(&result, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrixMul(&result, &vec, &mat);
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_V_C_H
+#define _VECTORMATH_MAT_AOS_V_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
+    return result;
+}
+
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
+{
+    vmathM3SetCol0(result, &_col0);
+}
+
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
+{
+    vmathM3SetCol1(result, &_col1);
+}
+
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
+{
+    vmathM3SetCol2(result, &_col2);
+}
+
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
+{
+    vmathM3SetCol(result, col, &vec);
+}
+
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
+{
+    vmathM3SetRow(result, row, &vec);
+}
+
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
+{
+    vmathM3SetElem(result, col, row, val);
+}
+
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
+{
+    return vmathM3GetElem(&mat, col, row);
+}
+
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
+{
+    VmathVector3 result;
+    vmathM3GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
+{
+    VmathVector3 result;
+    vmathM3GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Inverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM3Determinant_V( VmathMatrix3 mat )
+{
+    return vmathM3Determinant(&mat);
+}
+
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathM3MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeIdentity_V( )
+{
+    VmathMatrix3 result;
+    vmathM3MakeIdentity(&result);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
+{
+    VmathMatrix3 result;
+    vmathM3Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print_V( VmathMatrix3 mat )
+{
+    vmathM3Print(&mat);
+}
+
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
+{
+    vmathM3Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromT3(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
+{
+    vmathM4SetCol0(result, &_col0);
+}
+
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
+{
+    vmathM4SetCol1(result, &_col1);
+}
+
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
+{
+    vmathM4SetCol2(result, &_col2);
+}
+
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
+{
+    vmathM4SetCol3(result, &_col3);
+}
+
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
+{
+    vmathM4SetCol(result, col, &vec);
+}
+
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
+{
+    vmathM4SetRow(result, row, &vec);
+}
+
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
+{
+    vmathM4SetElem(result, col, row, val);
+}
+
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
+{
+    return vmathM4GetElem(&mat, col, row);
+}
+
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol3(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
+{
+    VmathVector4 result;
+    vmathM4GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
+{
+    VmathVector4 result;
+    vmathM4GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Inverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AffineInverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4OrthoInverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM4Determinant_V( VmathMatrix4 mat )
+{
+    return vmathM4Determinant(&mat);
+}
+
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV4(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathM4MulP3(&result, &mat, &pnt);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulT3(&result, &mat, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeIdentity_V( )
+{
+    VmathMatrix4 result;
+    vmathM4MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
+{
+    vmathM4SetUpper3x3(result, &mat3);
+}
+
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
+{
+    VmathMatrix3 result;
+    vmathM4GetUpper3x3(&result, &mat);
+    return result;
+}
+
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
+{
+    vmathM4SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
+{
+    VmathVector3 result;
+    vmathM4GetTranslation(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
+{
+    VmathMatrix4 result;
+    vmathM4Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print_V( VmathMatrix4 mat )
+{
+    vmathM4Print(&mat);
+}
+
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
+{
+    vmathM4Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
+{
+    vmathT3SetCol0(result, &_col0);
+}
+
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
+{
+    vmathT3SetCol1(result, &_col1);
+}
+
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
+{
+    vmathT3SetCol2(result, &_col2);
+}
+
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
+{
+    vmathT3SetCol3(result, &_col3);
+}
+
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
+{
+    vmathT3SetCol(result, col, &vec);
+}
+
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
+{
+    vmathT3SetRow(result, row, &vec);
+}
+
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
+{
+    vmathT3SetElem(result, col, row, val);
+}
+
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
+{
+    return vmathT3GetElem(&tfrm, col, row);
+}
+
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol0(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol1(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol2(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
+{
+    VmathVector3 result;
+    vmathT3GetCol(&result, &tfrm, col);
+    return result;
+}
+
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
+{
+    VmathVector4 result;
+    vmathT3GetRow(&result, &tfrm, row);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3Inverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3OrthoInverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3AbsPerElem(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathT3MulV3(&result, &tfrm, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathT3MulP3(&result, &tfrm, &pnt);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3Mul(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeIdentity_V( )
+{
+    VmathTransform3 result;
+    vmathT3MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
+{
+    vmathT3SetUpper3x3(result, &tfrm);
+}
+
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
+{
+    VmathMatrix3 result;
+    vmathT3GetUpper3x3(&result, &tfrm);
+    return result;
+}
+
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
+{
+    vmathT3SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetTranslation(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3AppendScale(&result, &tfrm, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3PrependScale(&result, &scaleVec, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
+{
+    VmathTransform3 result;
+    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print_V( VmathTransform3 tfrm )
+{
+    vmathT3Print(&tfrm);
+}
+
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
+{
+    vmathT3Prints(&tfrm, name);
+}
+
+#endif
+
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
+{
+    VmathQuat result;
+    vmathQMakeFromM3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
+{
+    VmathMatrix3 result;
+    vmathV3Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathV4Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathV3RowMul(&result, &vec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrix(&result, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrixMul(&result, &vec, &mat);
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos.h
index c886ab669..2b114e257 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos.h
@@ -1,368 +1,368 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_C_H
-#define _VECTORMATH_QUAT_AOS_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
-{
-    vmathQSetXYZ( result, xyz );
-    vmathQSetW( result, _w );
-}
-
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathQMakeIdentity( VmathQuat *result )
-{
-    vmathQMakeFromElems( result, 0.0f, 0.0f, 0.0f, 1.0f );
-}
-
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    VmathQuat tmpQ_0, tmpQ_1;
-    vmathQSub( &tmpQ_0, quat1, quat0 );
-    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
-    vmathQAdd( result, quat0, &tmpQ_1 );
-}
-
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
-{
-    VmathQuat start, tmpQ_0, tmpQ_1;
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = vmathQDot( unitQuat0, unitQuat1 );
-    if ( cosAngle < 0.0f ) {
-        cosAngle = -cosAngle;
-        vmathQNeg( &start, unitQuat0 );
-    } else {
-        vmathQCopy( &start, unitQuat0 );
-    }
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    vmathQScalarMul( &tmpQ_0, &start, scale0 );
-    vmathQScalarMul( &tmpQ_1, unitQuat1, scale1 );
-    vmathQAdd( result, &tmpQ_0, &tmpQ_1 );
-}
-
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
-{
-    VmathQuat tmp0, tmp1;
-    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
-    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
-    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
-}
-
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
-{
-    vmathV3MakeFromElems( result, quat->x, quat->y, quat->z );
-}
-
-static inline void vmathQSetX( VmathQuat *result, float _x )
-{
-    result->x = _x;
-}
-
-static inline float vmathQGetX( const VmathQuat *quat )
-{
-    return quat->x;
-}
-
-static inline void vmathQSetY( VmathQuat *result, float _y )
-{
-    result->y = _y;
-}
-
-static inline float vmathQGetY( const VmathQuat *quat )
-{
-    return quat->y;
-}
-
-static inline void vmathQSetZ( VmathQuat *result, float _z )
-{
-    result->z = _z;
-}
-
-static inline float vmathQGetZ( const VmathQuat *quat )
-{
-    return quat->z;
-}
-
-static inline void vmathQSetW( VmathQuat *result, float _w )
-{
-    result->w = _w;
-}
-
-static inline float vmathQGetW( const VmathQuat *quat )
-{
-    return quat->w;
-}
-
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline float vmathQGetElem( const VmathQuat *quat, int idx )
-{
-    return *(&quat->x + idx);
-}
-
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->x = ( quat0->x + quat1->x );
-    result->y = ( quat0->y + quat1->y );
-    result->z = ( quat0->z + quat1->z );
-    result->w = ( quat0->w + quat1->w );
-}
-
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->x = ( quat0->x - quat1->x );
-    result->y = ( quat0->y - quat1->y );
-    result->z = ( quat0->z - quat1->z );
-    result->w = ( quat0->w - quat1->w );
-}
-
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->x = ( quat->x * scalar );
-    result->y = ( quat->y * scalar );
-    result->z = ( quat->z * scalar );
-    result->w = ( quat->w * scalar );
-}
-
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->x = ( quat->x / scalar );
-    result->y = ( quat->y / scalar );
-    result->z = ( quat->z / scalar );
-    result->w = ( quat->w / scalar );
-}
-
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
-{
-    result->x = -quat->x;
-    result->y = -quat->y;
-    result->z = -quat->z;
-    result->w = -quat->w;
-}
-
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    float result;
-    result = ( quat0->x * quat1->x );
-    result = ( result + ( quat0->y * quat1->y ) );
-    result = ( result + ( quat0->z * quat1->z ) );
-    result = ( result + ( quat0->w * quat1->w ) );
-    return result;
-}
-
-static inline float vmathQNorm( const VmathQuat *quat )
-{
-    float result;
-    result = ( quat->x * quat->x );
-    result = ( result + ( quat->y * quat->y ) );
-    result = ( result + ( quat->z * quat->z ) );
-    result = ( result + ( quat->w * quat->w ) );
-    return result;
-}
-
-static inline float vmathQLength( const VmathQuat *quat )
-{
-    return sqrtf( vmathQNorm( quat ) );
-}
-
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
-{
-    float lenSqr, lenInv;
-    lenSqr = vmathQNorm( quat );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    result->x = ( quat->x * lenInv );
-    result->y = ( quat->y * lenInv );
-    result->z = ( quat->z * lenInv );
-    result->w = ( quat->w * lenInv );
-}
-
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    float cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + vmathV3Dot( unitVec0, unitVec1 ) ) ) );
-    recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
-    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
-    vmathQMakeFromV3Scalar( result, &tmpV3_1, ( cosHalfAngleX2 * 0.5f ) );
-}
-
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
-{
-    VmathVector3 tmpV3_0;
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    vmathV3ScalarMul( &tmpV3_0, unitVec, s );
-    vmathQMakeFromV3Scalar( result, &tmpV3_0, c );
-}
-
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    vmathQMakeFromElems( result, s, 0.0f, 0.0f, c );
-}
-
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    vmathQMakeFromElems( result, 0.0f, s, 0.0f, c );
-}
-
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    vmathQMakeFromElems( result, 0.0f, 0.0f, s, c );
-}
-
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    float tmpX, tmpY, tmpZ, tmpW;
-    tmpX = ( ( ( ( quat0->w * quat1->x ) + ( quat0->x * quat1->w ) ) + ( quat0->y * quat1->z ) ) - ( quat0->z * quat1->y ) );
-    tmpY = ( ( ( ( quat0->w * quat1->y ) + ( quat0->y * quat1->w ) ) + ( quat0->z * quat1->x ) ) - ( quat0->x * quat1->z ) );
-    tmpZ = ( ( ( ( quat0->w * quat1->z ) + ( quat0->z * quat1->w ) ) + ( quat0->x * quat1->y ) ) - ( quat0->y * quat1->x ) );
-    tmpW = ( ( ( ( quat0->w * quat1->w ) - ( quat0->x * quat1->x ) ) - ( quat0->y * quat1->y ) ) - ( quat0->z * quat1->z ) );
-    vmathQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
-{
-    float tmpX, tmpY, tmpZ, tmpW;
-    tmpX = ( ( ( quat->w * vec->x ) + ( quat->y * vec->z ) ) - ( quat->z * vec->y ) );
-    tmpY = ( ( ( quat->w * vec->y ) + ( quat->z * vec->x ) ) - ( quat->x * vec->z ) );
-    tmpZ = ( ( ( quat->w * vec->z ) + ( quat->x * vec->y ) ) - ( quat->y * vec->x ) );
-    tmpW = ( ( ( quat->x * vec->x ) + ( quat->y * vec->y ) ) + ( quat->z * vec->z ) );
-    result->x = ( ( ( ( tmpW * quat->x ) + ( tmpX * quat->w ) ) - ( tmpY * quat->z ) ) + ( tmpZ * quat->y ) );
-    result->y = ( ( ( ( tmpW * quat->y ) + ( tmpY * quat->w ) ) - ( tmpZ * quat->x ) ) + ( tmpX * quat->z ) );
-    result->z = ( ( ( ( tmpW * quat->z ) + ( tmpZ * quat->w ) ) - ( tmpX * quat->y ) ) + ( tmpY * quat->x ) );
-}
-
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
-{
-    vmathQMakeFromElems( result, -quat->x, -quat->y, -quat->z, quat->w );
-}
-
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
-{
-    result->x = ( select1 )? quat1->x : quat0->x;
-    result->y = ( select1 )? quat1->y : quat0->y;
-    result->z = ( select1 )? quat1->z : quat0->z;
-    result->w = ( select1 )? quat1->w : quat0->w;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint( const VmathQuat *quat )
-{
-    printf( "( %f %f %f %f )\n", quat->x, quat->y, quat->z, quat->w );
-}
-
-static inline void vmathQPrints( const VmathQuat *quat, const char *name )
-{
-    printf( "%s: ( %f %f %f %f )\n", name, quat->x, quat->y, quat->z, quat->w );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_C_H
+#define _VECTORMATH_QUAT_AOS_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
+{
+    vmathQSetXYZ( result, xyz );
+    vmathQSetW( result, _w );
+}
+
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathQMakeIdentity( VmathQuat *result )
+{
+    vmathQMakeFromElems( result, 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    VmathQuat tmpQ_0, tmpQ_1;
+    vmathQSub( &tmpQ_0, quat1, quat0 );
+    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
+    vmathQAdd( result, quat0, &tmpQ_1 );
+}
+
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
+{
+    VmathQuat start, tmpQ_0, tmpQ_1;
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = vmathQDot( unitQuat0, unitQuat1 );
+    if ( cosAngle < 0.0f ) {
+        cosAngle = -cosAngle;
+        vmathQNeg( &start, unitQuat0 );
+    } else {
+        vmathQCopy( &start, unitQuat0 );
+    }
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    vmathQScalarMul( &tmpQ_0, &start, scale0 );
+    vmathQScalarMul( &tmpQ_1, unitQuat1, scale1 );
+    vmathQAdd( result, &tmpQ_0, &tmpQ_1 );
+}
+
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
+{
+    VmathQuat tmp0, tmp1;
+    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
+    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
+    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
+}
+
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
+{
+    vmathV3MakeFromElems( result, quat->x, quat->y, quat->z );
+}
+
+static inline void vmathQSetX( VmathQuat *result, float _x )
+{
+    result->x = _x;
+}
+
+static inline float vmathQGetX( const VmathQuat *quat )
+{
+    return quat->x;
+}
+
+static inline void vmathQSetY( VmathQuat *result, float _y )
+{
+    result->y = _y;
+}
+
+static inline float vmathQGetY( const VmathQuat *quat )
+{
+    return quat->y;
+}
+
+static inline void vmathQSetZ( VmathQuat *result, float _z )
+{
+    result->z = _z;
+}
+
+static inline float vmathQGetZ( const VmathQuat *quat )
+{
+    return quat->z;
+}
+
+static inline void vmathQSetW( VmathQuat *result, float _w )
+{
+    result->w = _w;
+}
+
+static inline float vmathQGetW( const VmathQuat *quat )
+{
+    return quat->w;
+}
+
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline float vmathQGetElem( const VmathQuat *quat, int idx )
+{
+    return *(&quat->x + idx);
+}
+
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->x = ( quat0->x + quat1->x );
+    result->y = ( quat0->y + quat1->y );
+    result->z = ( quat0->z + quat1->z );
+    result->w = ( quat0->w + quat1->w );
+}
+
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->x = ( quat0->x - quat1->x );
+    result->y = ( quat0->y - quat1->y );
+    result->z = ( quat0->z - quat1->z );
+    result->w = ( quat0->w - quat1->w );
+}
+
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->x = ( quat->x * scalar );
+    result->y = ( quat->y * scalar );
+    result->z = ( quat->z * scalar );
+    result->w = ( quat->w * scalar );
+}
+
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->x = ( quat->x / scalar );
+    result->y = ( quat->y / scalar );
+    result->z = ( quat->z / scalar );
+    result->w = ( quat->w / scalar );
+}
+
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
+{
+    result->x = -quat->x;
+    result->y = -quat->y;
+    result->z = -quat->z;
+    result->w = -quat->w;
+}
+
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    float result;
+    result = ( quat0->x * quat1->x );
+    result = ( result + ( quat0->y * quat1->y ) );
+    result = ( result + ( quat0->z * quat1->z ) );
+    result = ( result + ( quat0->w * quat1->w ) );
+    return result;
+}
+
+static inline float vmathQNorm( const VmathQuat *quat )
+{
+    float result;
+    result = ( quat->x * quat->x );
+    result = ( result + ( quat->y * quat->y ) );
+    result = ( result + ( quat->z * quat->z ) );
+    result = ( result + ( quat->w * quat->w ) );
+    return result;
+}
+
+static inline float vmathQLength( const VmathQuat *quat )
+{
+    return sqrtf( vmathQNorm( quat ) );
+}
+
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
+{
+    float lenSqr, lenInv;
+    lenSqr = vmathQNorm( quat );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    result->x = ( quat->x * lenInv );
+    result->y = ( quat->y * lenInv );
+    result->z = ( quat->z * lenInv );
+    result->w = ( quat->w * lenInv );
+}
+
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    float cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + vmathV3Dot( unitVec0, unitVec1 ) ) ) );
+    recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
+    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
+    vmathQMakeFromV3Scalar( result, &tmpV3_1, ( cosHalfAngleX2 * 0.5f ) );
+}
+
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
+{
+    VmathVector3 tmpV3_0;
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    vmathV3ScalarMul( &tmpV3_0, unitVec, s );
+    vmathQMakeFromV3Scalar( result, &tmpV3_0, c );
+}
+
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    vmathQMakeFromElems( result, s, 0.0f, 0.0f, c );
+}
+
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    vmathQMakeFromElems( result, 0.0f, s, 0.0f, c );
+}
+
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    vmathQMakeFromElems( result, 0.0f, 0.0f, s, c );
+}
+
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    float tmpX, tmpY, tmpZ, tmpW;
+    tmpX = ( ( ( ( quat0->w * quat1->x ) + ( quat0->x * quat1->w ) ) + ( quat0->y * quat1->z ) ) - ( quat0->z * quat1->y ) );
+    tmpY = ( ( ( ( quat0->w * quat1->y ) + ( quat0->y * quat1->w ) ) + ( quat0->z * quat1->x ) ) - ( quat0->x * quat1->z ) );
+    tmpZ = ( ( ( ( quat0->w * quat1->z ) + ( quat0->z * quat1->w ) ) + ( quat0->x * quat1->y ) ) - ( quat0->y * quat1->x ) );
+    tmpW = ( ( ( ( quat0->w * quat1->w ) - ( quat0->x * quat1->x ) ) - ( quat0->y * quat1->y ) ) - ( quat0->z * quat1->z ) );
+    vmathQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
+{
+    float tmpX, tmpY, tmpZ, tmpW;
+    tmpX = ( ( ( quat->w * vec->x ) + ( quat->y * vec->z ) ) - ( quat->z * vec->y ) );
+    tmpY = ( ( ( quat->w * vec->y ) + ( quat->z * vec->x ) ) - ( quat->x * vec->z ) );
+    tmpZ = ( ( ( quat->w * vec->z ) + ( quat->x * vec->y ) ) - ( quat->y * vec->x ) );
+    tmpW = ( ( ( quat->x * vec->x ) + ( quat->y * vec->y ) ) + ( quat->z * vec->z ) );
+    result->x = ( ( ( ( tmpW * quat->x ) + ( tmpX * quat->w ) ) - ( tmpY * quat->z ) ) + ( tmpZ * quat->y ) );
+    result->y = ( ( ( ( tmpW * quat->y ) + ( tmpY * quat->w ) ) - ( tmpZ * quat->x ) ) + ( tmpX * quat->z ) );
+    result->z = ( ( ( ( tmpW * quat->z ) + ( tmpZ * quat->w ) ) - ( tmpX * quat->y ) ) + ( tmpY * quat->x ) );
+}
+
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
+{
+    vmathQMakeFromElems( result, -quat->x, -quat->y, -quat->z, quat->w );
+}
+
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
+{
+    result->x = ( select1 )? quat1->x : quat0->x;
+    result->y = ( select1 )? quat1->y : quat0->y;
+    result->z = ( select1 )? quat1->z : quat0->z;
+    result->w = ( select1 )? quat1->w : quat0->w;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint( const VmathQuat *quat )
+{
+    printf( "( %f %f %f %f )\n", quat->x, quat->y, quat->z, quat->w );
+}
+
+static inline void vmathQPrints( const VmathQuat *quat, const char *name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, quat->x, quat->y, quat->z, quat->w );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos_v.h
index b5a9e248d..8ccf0789b 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/quat_aos_v.h
@@ -1,300 +1,300 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_V_C_H
-#define _VECTORMATH_QUAT_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
-{
-    VmathQuat result;
-    vmathQMakeFromV4(&result, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
-{
-    VmathQuat result;
-    vmathQMakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeIdentity_V( )
-{
-    VmathQuat result;
-    vmathQMakeIdentity(&result);
-    return result;
-}
-
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQLerp(&result, t, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
-{
-    VmathQuat result;
-    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
-{
-    VmathQuat result;
-    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
-    return result;
-}
-
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
-{
-    vmathQSetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
-{
-    VmathVector3 result;
-    vmathQGetXYZ(&result, &quat);
-    return result;
-}
-
-static inline void vmathQSetX_V( VmathQuat *result, float _x )
-{
-    vmathQSetX(result, _x);
-}
-
-static inline float vmathQGetX_V( VmathQuat quat )
-{
-    return vmathQGetX(&quat);
-}
-
-static inline void vmathQSetY_V( VmathQuat *result, float _y )
-{
-    vmathQSetY(result, _y);
-}
-
-static inline float vmathQGetY_V( VmathQuat quat )
-{
-    return vmathQGetY(&quat);
-}
-
-static inline void vmathQSetZ_V( VmathQuat *result, float _z )
-{
-    vmathQSetZ(result, _z);
-}
-
-static inline float vmathQGetZ_V( VmathQuat quat )
-{
-    return vmathQGetZ(&quat);
-}
-
-static inline void vmathQSetW_V( VmathQuat *result, float _w )
-{
-    vmathQSetW(result, _w);
-}
-
-static inline float vmathQGetW_V( VmathQuat quat )
-{
-    return vmathQGetW(&quat);
-}
-
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
-{
-    vmathQSetElem(result, idx, value);
-}
-
-static inline float vmathQGetElem_V( VmathQuat quat, int idx )
-{
-    return vmathQGetElem(&quat, idx);
-}
-
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQAdd(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQSub(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarMul(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarDiv(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQNeg_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNeg(&result, &quat);
-    return result;
-}
-
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    return vmathQDot(&quat0, &quat1);
-}
-
-static inline float vmathQNorm_V( VmathQuat quat )
-{
-    return vmathQNorm(&quat);
-}
-
-static inline float vmathQLength_V( VmathQuat quat )
-{
-    return vmathQLength(&quat);
-}
-
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNormalize(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathQuat result;
-    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathQuat result;
-    vmathQMakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationX_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationY_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationZ_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQMul(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathQRotate(&result, &quat, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQConj_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQConj(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
-{
-    VmathQuat result;
-    vmathQSelect(&result, &quat0, &quat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint_V( VmathQuat quat )
-{
-    vmathQPrint(&quat);
-}
-
-static inline void vmathQPrints_V( VmathQuat quat, const char *name )
-{
-    vmathQPrints(&quat, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_V_C_H
+#define _VECTORMATH_QUAT_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
+{
+    VmathQuat result;
+    vmathQMakeFromV4(&result, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
+{
+    VmathQuat result;
+    vmathQMakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeIdentity_V( )
+{
+    VmathQuat result;
+    vmathQMakeIdentity(&result);
+    return result;
+}
+
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQLerp(&result, t, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
+{
+    VmathQuat result;
+    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
+{
+    VmathQuat result;
+    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
+    return result;
+}
+
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
+{
+    vmathQSetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
+{
+    VmathVector3 result;
+    vmathQGetXYZ(&result, &quat);
+    return result;
+}
+
+static inline void vmathQSetX_V( VmathQuat *result, float _x )
+{
+    vmathQSetX(result, _x);
+}
+
+static inline float vmathQGetX_V( VmathQuat quat )
+{
+    return vmathQGetX(&quat);
+}
+
+static inline void vmathQSetY_V( VmathQuat *result, float _y )
+{
+    vmathQSetY(result, _y);
+}
+
+static inline float vmathQGetY_V( VmathQuat quat )
+{
+    return vmathQGetY(&quat);
+}
+
+static inline void vmathQSetZ_V( VmathQuat *result, float _z )
+{
+    vmathQSetZ(result, _z);
+}
+
+static inline float vmathQGetZ_V( VmathQuat quat )
+{
+    return vmathQGetZ(&quat);
+}
+
+static inline void vmathQSetW_V( VmathQuat *result, float _w )
+{
+    vmathQSetW(result, _w);
+}
+
+static inline float vmathQGetW_V( VmathQuat quat )
+{
+    return vmathQGetW(&quat);
+}
+
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
+{
+    vmathQSetElem(result, idx, value);
+}
+
+static inline float vmathQGetElem_V( VmathQuat quat, int idx )
+{
+    return vmathQGetElem(&quat, idx);
+}
+
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQAdd(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQSub(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarMul(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarDiv(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQNeg_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNeg(&result, &quat);
+    return result;
+}
+
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    return vmathQDot(&quat0, &quat1);
+}
+
+static inline float vmathQNorm_V( VmathQuat quat )
+{
+    return vmathQNorm(&quat);
+}
+
+static inline float vmathQLength_V( VmathQuat quat )
+{
+    return vmathQLength(&quat);
+}
+
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNormalize(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathQuat result;
+    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathQuat result;
+    vmathQMakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationX_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationY_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationZ_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQMul(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathQRotate(&result, &quat, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQConj_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQConj(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
+{
+    VmathQuat result;
+    vmathQSelect(&result, &quat0, &quat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint_V( VmathQuat quat )
+{
+    vmathQPrint(&quat);
+}
+
+static inline void vmathQPrints_V( VmathQuat quat, const char *name )
+{
+    vmathQPrints(&quat, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos.h
index f0b0f437d..80d974f99 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos.h
@@ -1,971 +1,971 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_C_H
-#define _VECTORMATH_VEC_AOS_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathV3MakeXAxis( VmathVector3 *result )
-{
-    vmathV3MakeFromElems( result, 1.0f, 0.0f, 0.0f );
-}
-
-static inline void vmathV3MakeYAxis( VmathVector3 *result )
-{
-    vmathV3MakeFromElems( result, 0.0f, 1.0f, 0.0f );
-}
-
-static inline void vmathV3MakeZAxis( VmathVector3 *result )
-{
-    vmathV3MakeFromElems( result, 0.0f, 0.0f, 1.0f );
-}
-
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathV3Sub( &tmpV3_0, vec1, vec0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathV3Add( result, vec0, &tmpV3_1 );
-}
-
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = vmathV3Dot( unitVec0, unitVec1 );
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    vmathV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
-    vmathV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
-    vmathV3Add( result, &tmpV3_0, &tmpV3_1 );
-}
-
-static inline void vmathV3SetX( VmathVector3 *result, float _x )
-{
-    result->x = _x;
-}
-
-static inline float vmathV3GetX( const VmathVector3 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathV3SetY( VmathVector3 *result, float _y )
-{
-    result->y = _y;
-}
-
-static inline float vmathV3GetY( const VmathVector3 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathV3SetZ( VmathVector3 *result, float _z )
-{
-    result->z = _z;
-}
-
-static inline float vmathV3GetZ( const VmathVector3 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = ( vec0->x + vec1->x );
-    result->y = ( vec0->y + vec1->y );
-    result->z = ( vec0->z + vec1->z );
-}
-
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = ( vec0->x - vec1->x );
-    result->y = ( vec0->y - vec1->y );
-    result->z = ( vec0->z - vec1->z );
-}
-
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
-{
-    result->x = ( vec->x + pnt1->x );
-    result->y = ( vec->y + pnt1->y );
-    result->z = ( vec->z + pnt1->z );
-}
-
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->x = ( vec->x * scalar );
-    result->y = ( vec->y * scalar );
-    result->z = ( vec->z * scalar );
-}
-
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->x = ( vec->x / scalar );
-    result->y = ( vec->y / scalar );
-    result->z = ( vec->z / scalar );
-}
-
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = -vec->x;
-    result->y = -vec->y;
-    result->z = -vec->z;
-}
-
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = ( vec0->x * vec1->x );
-    result->y = ( vec0->y * vec1->y );
-    result->z = ( vec0->z * vec1->z );
-}
-
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = ( vec0->x / vec1->x );
-    result->y = ( vec0->y / vec1->y );
-    result->z = ( vec0->z / vec1->z );
-}
-
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = ( 1.0f / vec->x );
-    result->y = ( 1.0f / vec->y );
-    result->z = ( 1.0f / vec->z );
-}
-
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = sqrtf( vec->x );
-    result->y = sqrtf( vec->y );
-    result->z = sqrtf( vec->z );
-}
-
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = ( 1.0f / sqrtf( vec->x ) );
-    result->y = ( 1.0f / sqrtf( vec->y ) );
-    result->z = ( 1.0f / sqrtf( vec->z ) );
-}
-
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->x = fabsf( vec->x );
-    result->y = fabsf( vec->y );
-    result->z = fabsf( vec->z );
-}
-
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = ( vec1->x < 0.0f )? -fabsf( vec0->x ) : fabsf( vec0->x );
-    result->y = ( vec1->y < 0.0f )? -fabsf( vec0->y ) : fabsf( vec0->y );
-    result->z = ( vec1->z < 0.0f )? -fabsf( vec0->z ) : fabsf( vec0->z );
-}
-
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = (vec0->x > vec1->x)? vec0->x : vec1->x;
-    result->y = (vec0->y > vec1->y)? vec0->y : vec1->y;
-    result->z = (vec0->z > vec1->z)? vec0->z : vec1->z;
-}
-
-static inline float vmathV3MaxElem( const VmathVector3 *vec )
-{
-    float result;
-    result = (vec->x > vec->y)? vec->x : vec->y;
-    result = (vec->z > result)? vec->z : result;
-    return result;
-}
-
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->x = (vec0->x < vec1->x)? vec0->x : vec1->x;
-    result->y = (vec0->y < vec1->y)? vec0->y : vec1->y;
-    result->z = (vec0->z < vec1->z)? vec0->z : vec1->z;
-}
-
-static inline float vmathV3MinElem( const VmathVector3 *vec )
-{
-    float result;
-    result = (vec->x < vec->y)? vec->x : vec->y;
-    result = (vec->z < result)? vec->z : result;
-    return result;
-}
-
-static inline float vmathV3Sum( const VmathVector3 *vec )
-{
-    float result;
-    result = ( vec->x + vec->y );
-    result = ( result + vec->z );
-    return result;
-}
-
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    float result;
-    result = ( vec0->x * vec1->x );
-    result = ( result + ( vec0->y * vec1->y ) );
-    result = ( result + ( vec0->z * vec1->z ) );
-    return result;
-}
-
-static inline float vmathV3LengthSqr( const VmathVector3 *vec )
-{
-    float result;
-    result = ( vec->x * vec->x );
-    result = ( result + ( vec->y * vec->y ) );
-    result = ( result + ( vec->z * vec->z ) );
-    return result;
-}
-
-static inline float vmathV3Length( const VmathVector3 *vec )
-{
-    return sqrtf( vmathV3LengthSqr( vec ) );
-}
-
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
-{
-    float lenSqr, lenInv;
-    lenSqr = vmathV3LengthSqr( vec );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    result->x = ( vec->x * lenInv );
-    result->y = ( vec->y * lenInv );
-    result->z = ( vec->z * lenInv );
-}
-
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    float tmpX, tmpY, tmpZ;
-    tmpX = ( ( vec0->y * vec1->z ) - ( vec0->z * vec1->y ) );
-    tmpY = ( ( vec0->z * vec1->x ) - ( vec0->x * vec1->z ) );
-    tmpZ = ( ( vec0->x * vec1->y ) - ( vec0->y * vec1->x ) );
-    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
-{
-    result->x = ( select1 )? vec1->x : vec0->x;
-    result->y = ( select1 )? vec1->y : vec0->y;
-    result->z = ( select1 )? vec1->z : vec0->z;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print( const VmathVector3 *vec )
-{
-    printf( "( %f %f %f )\n", vec->x, vec->y, vec->z );
-}
-
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
-{
-    printf( "%s: ( %f %f %f )\n", name, vec->x, vec->y, vec->z );
-}
-
-#endif
-
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
-{
-    vmathV4SetXYZ( result, xyz );
-    vmathV4SetW( result, _w );
-}
-
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = 0.0f;
-}
-
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-    result->w = 1.0f;
-}
-
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathV4MakeXAxis( VmathVector4 *result )
-{
-    vmathV4MakeFromElems( result, 1.0f, 0.0f, 0.0f, 0.0f );
-}
-
-static inline void vmathV4MakeYAxis( VmathVector4 *result )
-{
-    vmathV4MakeFromElems( result, 0.0f, 1.0f, 0.0f, 0.0f );
-}
-
-static inline void vmathV4MakeZAxis( VmathVector4 *result )
-{
-    vmathV4MakeFromElems( result, 0.0f, 0.0f, 1.0f, 0.0f );
-}
-
-static inline void vmathV4MakeWAxis( VmathVector4 *result )
-{
-    vmathV4MakeFromElems( result, 0.0f, 0.0f, 0.0f, 1.0f );
-}
-
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    VmathVector4 tmpV4_0, tmpV4_1;
-    vmathV4Sub( &tmpV4_0, vec1, vec0 );
-    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
-    vmathV4Add( result, vec0, &tmpV4_1 );
-}
-
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
-{
-    VmathVector4 tmpV4_0, tmpV4_1;
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = vmathV4Dot( unitVec0, unitVec1 );
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    vmathV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
-    vmathV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
-    vmathV4Add( result, &tmpV4_0, &tmpV4_1 );
-}
-
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
-{
-    vmathV3MakeFromElems( result, vec->x, vec->y, vec->z );
-}
-
-static inline void vmathV4SetX( VmathVector4 *result, float _x )
-{
-    result->x = _x;
-}
-
-static inline float vmathV4GetX( const VmathVector4 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathV4SetY( VmathVector4 *result, float _y )
-{
-    result->y = _y;
-}
-
-static inline float vmathV4GetY( const VmathVector4 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathV4SetZ( VmathVector4 *result, float _z )
-{
-    result->z = _z;
-}
-
-static inline float vmathV4GetZ( const VmathVector4 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathV4SetW( VmathVector4 *result, float _w )
-{
-    result->w = _w;
-}
-
-static inline float vmathV4GetW( const VmathVector4 *vec )
-{
-    return vec->w;
-}
-
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = ( vec0->x + vec1->x );
-    result->y = ( vec0->y + vec1->y );
-    result->z = ( vec0->z + vec1->z );
-    result->w = ( vec0->w + vec1->w );
-}
-
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = ( vec0->x - vec1->x );
-    result->y = ( vec0->y - vec1->y );
-    result->z = ( vec0->z - vec1->z );
-    result->w = ( vec0->w - vec1->w );
-}
-
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->x = ( vec->x * scalar );
-    result->y = ( vec->y * scalar );
-    result->z = ( vec->z * scalar );
-    result->w = ( vec->w * scalar );
-}
-
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->x = ( vec->x / scalar );
-    result->y = ( vec->y / scalar );
-    result->z = ( vec->z / scalar );
-    result->w = ( vec->w / scalar );
-}
-
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = -vec->x;
-    result->y = -vec->y;
-    result->z = -vec->z;
-    result->w = -vec->w;
-}
-
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = ( vec0->x * vec1->x );
-    result->y = ( vec0->y * vec1->y );
-    result->z = ( vec0->z * vec1->z );
-    result->w = ( vec0->w * vec1->w );
-}
-
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = ( vec0->x / vec1->x );
-    result->y = ( vec0->y / vec1->y );
-    result->z = ( vec0->z / vec1->z );
-    result->w = ( vec0->w / vec1->w );
-}
-
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = ( 1.0f / vec->x );
-    result->y = ( 1.0f / vec->y );
-    result->z = ( 1.0f / vec->z );
-    result->w = ( 1.0f / vec->w );
-}
-
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = sqrtf( vec->x );
-    result->y = sqrtf( vec->y );
-    result->z = sqrtf( vec->z );
-    result->w = sqrtf( vec->w );
-}
-
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = ( 1.0f / sqrtf( vec->x ) );
-    result->y = ( 1.0f / sqrtf( vec->y ) );
-    result->z = ( 1.0f / sqrtf( vec->z ) );
-    result->w = ( 1.0f / sqrtf( vec->w ) );
-}
-
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->x = fabsf( vec->x );
-    result->y = fabsf( vec->y );
-    result->z = fabsf( vec->z );
-    result->w = fabsf( vec->w );
-}
-
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = ( vec1->x < 0.0f )? -fabsf( vec0->x ) : fabsf( vec0->x );
-    result->y = ( vec1->y < 0.0f )? -fabsf( vec0->y ) : fabsf( vec0->y );
-    result->z = ( vec1->z < 0.0f )? -fabsf( vec0->z ) : fabsf( vec0->z );
-    result->w = ( vec1->w < 0.0f )? -fabsf( vec0->w ) : fabsf( vec0->w );
-}
-
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = (vec0->x > vec1->x)? vec0->x : vec1->x;
-    result->y = (vec0->y > vec1->y)? vec0->y : vec1->y;
-    result->z = (vec0->z > vec1->z)? vec0->z : vec1->z;
-    result->w = (vec0->w > vec1->w)? vec0->w : vec1->w;
-}
-
-static inline float vmathV4MaxElem( const VmathVector4 *vec )
-{
-    float result;
-    result = (vec->x > vec->y)? vec->x : vec->y;
-    result = (vec->z > result)? vec->z : result;
-    result = (vec->w > result)? vec->w : result;
-    return result;
-}
-
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->x = (vec0->x < vec1->x)? vec0->x : vec1->x;
-    result->y = (vec0->y < vec1->y)? vec0->y : vec1->y;
-    result->z = (vec0->z < vec1->z)? vec0->z : vec1->z;
-    result->w = (vec0->w < vec1->w)? vec0->w : vec1->w;
-}
-
-static inline float vmathV4MinElem( const VmathVector4 *vec )
-{
-    float result;
-    result = (vec->x < vec->y)? vec->x : vec->y;
-    result = (vec->z < result)? vec->z : result;
-    result = (vec->w < result)? vec->w : result;
-    return result;
-}
-
-static inline float vmathV4Sum( const VmathVector4 *vec )
-{
-    float result;
-    result = ( vec->x + vec->y );
-    result = ( result + vec->z );
-    result = ( result + vec->w );
-    return result;
-}
-
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    float result;
-    result = ( vec0->x * vec1->x );
-    result = ( result + ( vec0->y * vec1->y ) );
-    result = ( result + ( vec0->z * vec1->z ) );
-    result = ( result + ( vec0->w * vec1->w ) );
-    return result;
-}
-
-static inline float vmathV4LengthSqr( const VmathVector4 *vec )
-{
-    float result;
-    result = ( vec->x * vec->x );
-    result = ( result + ( vec->y * vec->y ) );
-    result = ( result + ( vec->z * vec->z ) );
-    result = ( result + ( vec->w * vec->w ) );
-    return result;
-}
-
-static inline float vmathV4Length( const VmathVector4 *vec )
-{
-    return sqrtf( vmathV4LengthSqr( vec ) );
-}
-
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
-{
-    float lenSqr, lenInv;
-    lenSqr = vmathV4LengthSqr( vec );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    result->x = ( vec->x * lenInv );
-    result->y = ( vec->y * lenInv );
-    result->z = ( vec->z * lenInv );
-    result->w = ( vec->w * lenInv );
-}
-
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
-{
-    result->x = ( select1 )? vec1->x : vec0->x;
-    result->y = ( select1 )? vec1->y : vec0->y;
-    result->z = ( select1 )? vec1->z : vec0->z;
-    result->w = ( select1 )? vec1->w : vec0->w;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print( const VmathVector4 *vec )
-{
-    printf( "( %f %f %f %f )\n", vec->x, vec->y, vec->z, vec->w );
-}
-
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
-{
-    printf( "%s: ( %f %f %f %f )\n", name, vec->x, vec->y, vec->z, vec->w );
-}
-
-#endif
-
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathP3AddV3( result, pnt0, &tmpV3_1 );
-}
-
-static inline void vmathP3SetX( VmathPoint3 *result, float _x )
-{
-    result->x = _x;
-}
-
-static inline float vmathP3GetX( const VmathPoint3 *pnt )
-{
-    return pnt->x;
-}
-
-static inline void vmathP3SetY( VmathPoint3 *result, float _y )
-{
-    result->y = _y;
-}
-
-static inline float vmathP3GetY( const VmathPoint3 *pnt )
-{
-    return pnt->y;
-}
-
-static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
-{
-    result->z = _z;
-}
-
-static inline float vmathP3GetZ( const VmathPoint3 *pnt )
-{
-    return pnt->z;
-}
-
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
-{
-    return *(&pnt->x + idx);
-}
-
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = ( pnt0->x - pnt1->x );
-    result->y = ( pnt0->y - pnt1->y );
-    result->z = ( pnt0->z - pnt1->z );
-}
-
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->x = ( pnt->x + vec1->x );
-    result->y = ( pnt->y + vec1->y );
-    result->z = ( pnt->z + vec1->z );
-}
-
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->x = ( pnt->x - vec1->x );
-    result->y = ( pnt->y - vec1->y );
-    result->z = ( pnt->z - vec1->z );
-}
-
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = ( pnt0->x * pnt1->x );
-    result->y = ( pnt0->y * pnt1->y );
-    result->z = ( pnt0->z * pnt1->z );
-}
-
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = ( pnt0->x / pnt1->x );
-    result->y = ( pnt0->y / pnt1->y );
-    result->z = ( pnt0->z / pnt1->z );
-}
-
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->x = ( 1.0f / pnt->x );
-    result->y = ( 1.0f / pnt->y );
-    result->z = ( 1.0f / pnt->z );
-}
-
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->x = sqrtf( pnt->x );
-    result->y = sqrtf( pnt->y );
-    result->z = sqrtf( pnt->z );
-}
-
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->x = ( 1.0f / sqrtf( pnt->x ) );
-    result->y = ( 1.0f / sqrtf( pnt->y ) );
-    result->z = ( 1.0f / sqrtf( pnt->z ) );
-}
-
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->x = fabsf( pnt->x );
-    result->y = fabsf( pnt->y );
-    result->z = fabsf( pnt->z );
-}
-
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = ( pnt1->x < 0.0f )? -fabsf( pnt0->x ) : fabsf( pnt0->x );
-    result->y = ( pnt1->y < 0.0f )? -fabsf( pnt0->y ) : fabsf( pnt0->y );
-    result->z = ( pnt1->z < 0.0f )? -fabsf( pnt0->z ) : fabsf( pnt0->z );
-}
-
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = (pnt0->x > pnt1->x)? pnt0->x : pnt1->x;
-    result->y = (pnt0->y > pnt1->y)? pnt0->y : pnt1->y;
-    result->z = (pnt0->z > pnt1->z)? pnt0->z : pnt1->z;
-}
-
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
-{
-    float result;
-    result = (pnt->x > pnt->y)? pnt->x : pnt->y;
-    result = (pnt->z > result)? pnt->z : result;
-    return result;
-}
-
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->x = (pnt0->x < pnt1->x)? pnt0->x : pnt1->x;
-    result->y = (pnt0->y < pnt1->y)? pnt0->y : pnt1->y;
-    result->z = (pnt0->z < pnt1->z)? pnt0->z : pnt1->z;
-}
-
-static inline float vmathP3MinElem( const VmathPoint3 *pnt )
-{
-    float result;
-    result = (pnt->x < pnt->y)? pnt->x : pnt->y;
-    result = (pnt->z < result)? pnt->z : result;
-    return result;
-}
-
-static inline float vmathP3Sum( const VmathPoint3 *pnt )
-{
-    float result;
-    result = ( pnt->x + pnt->y );
-    result = ( result + pnt->z );
-    return result;
-}
-
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
-{
-    float result;
-    result = ( pnt->x * unitVec->x );
-    result = ( result + ( pnt->y * unitVec->y ) );
-    result = ( result + ( pnt->z * unitVec->z ) );
-    return result;
-}
-
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
-{
-    result->x = ( select1 )? pnt1->x : pnt0->x;
-    result->y = ( select1 )? pnt1->y : pnt0->y;
-    result->z = ( select1 )? pnt1->z : pnt0->z;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print( const VmathPoint3 *pnt )
-{
-    printf( "( %f %f %f )\n", pnt->x, pnt->y, pnt->z );
-}
-
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
-{
-    printf( "%s: ( %f %f %f )\n", name, pnt->x, pnt->y, pnt->z );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_C_H
+#define _VECTORMATH_VEC_AOS_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathV3MakeXAxis( VmathVector3 *result )
+{
+    vmathV3MakeFromElems( result, 1.0f, 0.0f, 0.0f );
+}
+
+static inline void vmathV3MakeYAxis( VmathVector3 *result )
+{
+    vmathV3MakeFromElems( result, 0.0f, 1.0f, 0.0f );
+}
+
+static inline void vmathV3MakeZAxis( VmathVector3 *result )
+{
+    vmathV3MakeFromElems( result, 0.0f, 0.0f, 1.0f );
+}
+
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathV3Sub( &tmpV3_0, vec1, vec0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathV3Add( result, vec0, &tmpV3_1 );
+}
+
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = vmathV3Dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    vmathV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
+    vmathV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
+    vmathV3Add( result, &tmpV3_0, &tmpV3_1 );
+}
+
+static inline void vmathV3SetX( VmathVector3 *result, float _x )
+{
+    result->x = _x;
+}
+
+static inline float vmathV3GetX( const VmathVector3 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathV3SetY( VmathVector3 *result, float _y )
+{
+    result->y = _y;
+}
+
+static inline float vmathV3GetY( const VmathVector3 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathV3SetZ( VmathVector3 *result, float _z )
+{
+    result->z = _z;
+}
+
+static inline float vmathV3GetZ( const VmathVector3 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = ( vec0->x + vec1->x );
+    result->y = ( vec0->y + vec1->y );
+    result->z = ( vec0->z + vec1->z );
+}
+
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = ( vec0->x - vec1->x );
+    result->y = ( vec0->y - vec1->y );
+    result->z = ( vec0->z - vec1->z );
+}
+
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
+{
+    result->x = ( vec->x + pnt1->x );
+    result->y = ( vec->y + pnt1->y );
+    result->z = ( vec->z + pnt1->z );
+}
+
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->x = ( vec->x * scalar );
+    result->y = ( vec->y * scalar );
+    result->z = ( vec->z * scalar );
+}
+
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->x = ( vec->x / scalar );
+    result->y = ( vec->y / scalar );
+    result->z = ( vec->z / scalar );
+}
+
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = -vec->x;
+    result->y = -vec->y;
+    result->z = -vec->z;
+}
+
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = ( vec0->x * vec1->x );
+    result->y = ( vec0->y * vec1->y );
+    result->z = ( vec0->z * vec1->z );
+}
+
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = ( vec0->x / vec1->x );
+    result->y = ( vec0->y / vec1->y );
+    result->z = ( vec0->z / vec1->z );
+}
+
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = ( 1.0f / vec->x );
+    result->y = ( 1.0f / vec->y );
+    result->z = ( 1.0f / vec->z );
+}
+
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = sqrtf( vec->x );
+    result->y = sqrtf( vec->y );
+    result->z = sqrtf( vec->z );
+}
+
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = ( 1.0f / sqrtf( vec->x ) );
+    result->y = ( 1.0f / sqrtf( vec->y ) );
+    result->z = ( 1.0f / sqrtf( vec->z ) );
+}
+
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->x = fabsf( vec->x );
+    result->y = fabsf( vec->y );
+    result->z = fabsf( vec->z );
+}
+
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = ( vec1->x < 0.0f )? -fabsf( vec0->x ) : fabsf( vec0->x );
+    result->y = ( vec1->y < 0.0f )? -fabsf( vec0->y ) : fabsf( vec0->y );
+    result->z = ( vec1->z < 0.0f )? -fabsf( vec0->z ) : fabsf( vec0->z );
+}
+
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = (vec0->x > vec1->x)? vec0->x : vec1->x;
+    result->y = (vec0->y > vec1->y)? vec0->y : vec1->y;
+    result->z = (vec0->z > vec1->z)? vec0->z : vec1->z;
+}
+
+static inline float vmathV3MaxElem( const VmathVector3 *vec )
+{
+    float result;
+    result = (vec->x > vec->y)? vec->x : vec->y;
+    result = (vec->z > result)? vec->z : result;
+    return result;
+}
+
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->x = (vec0->x < vec1->x)? vec0->x : vec1->x;
+    result->y = (vec0->y < vec1->y)? vec0->y : vec1->y;
+    result->z = (vec0->z < vec1->z)? vec0->z : vec1->z;
+}
+
+static inline float vmathV3MinElem( const VmathVector3 *vec )
+{
+    float result;
+    result = (vec->x < vec->y)? vec->x : vec->y;
+    result = (vec->z < result)? vec->z : result;
+    return result;
+}
+
+static inline float vmathV3Sum( const VmathVector3 *vec )
+{
+    float result;
+    result = ( vec->x + vec->y );
+    result = ( result + vec->z );
+    return result;
+}
+
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    float result;
+    result = ( vec0->x * vec1->x );
+    result = ( result + ( vec0->y * vec1->y ) );
+    result = ( result + ( vec0->z * vec1->z ) );
+    return result;
+}
+
+static inline float vmathV3LengthSqr( const VmathVector3 *vec )
+{
+    float result;
+    result = ( vec->x * vec->x );
+    result = ( result + ( vec->y * vec->y ) );
+    result = ( result + ( vec->z * vec->z ) );
+    return result;
+}
+
+static inline float vmathV3Length( const VmathVector3 *vec )
+{
+    return sqrtf( vmathV3LengthSqr( vec ) );
+}
+
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = vmathV3LengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    result->x = ( vec->x * lenInv );
+    result->y = ( vec->y * lenInv );
+    result->z = ( vec->z * lenInv );
+}
+
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    float tmpX, tmpY, tmpZ;
+    tmpX = ( ( vec0->y * vec1->z ) - ( vec0->z * vec1->y ) );
+    tmpY = ( ( vec0->z * vec1->x ) - ( vec0->x * vec1->z ) );
+    tmpZ = ( ( vec0->x * vec1->y ) - ( vec0->y * vec1->x ) );
+    vmathV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
+{
+    result->x = ( select1 )? vec1->x : vec0->x;
+    result->y = ( select1 )? vec1->y : vec0->y;
+    result->z = ( select1 )? vec1->z : vec0->z;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print( const VmathVector3 *vec )
+{
+    printf( "( %f %f %f )\n", vec->x, vec->y, vec->z );
+}
+
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
+{
+    printf( "%s: ( %f %f %f )\n", name, vec->x, vec->y, vec->z );
+}
+
+#endif
+
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
+{
+    vmathV4SetXYZ( result, xyz );
+    vmathV4SetW( result, _w );
+}
+
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = 0.0f;
+}
+
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+    result->w = 1.0f;
+}
+
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathV4MakeXAxis( VmathVector4 *result )
+{
+    vmathV4MakeFromElems( result, 1.0f, 0.0f, 0.0f, 0.0f );
+}
+
+static inline void vmathV4MakeYAxis( VmathVector4 *result )
+{
+    vmathV4MakeFromElems( result, 0.0f, 1.0f, 0.0f, 0.0f );
+}
+
+static inline void vmathV4MakeZAxis( VmathVector4 *result )
+{
+    vmathV4MakeFromElems( result, 0.0f, 0.0f, 1.0f, 0.0f );
+}
+
+static inline void vmathV4MakeWAxis( VmathVector4 *result )
+{
+    vmathV4MakeFromElems( result, 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    VmathVector4 tmpV4_0, tmpV4_1;
+    vmathV4Sub( &tmpV4_0, vec1, vec0 );
+    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
+    vmathV4Add( result, vec0, &tmpV4_1 );
+}
+
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
+{
+    VmathVector4 tmpV4_0, tmpV4_1;
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = vmathV4Dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    vmathV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
+    vmathV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
+    vmathV4Add( result, &tmpV4_0, &tmpV4_1 );
+}
+
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
+{
+    vmathV3MakeFromElems( result, vec->x, vec->y, vec->z );
+}
+
+static inline void vmathV4SetX( VmathVector4 *result, float _x )
+{
+    result->x = _x;
+}
+
+static inline float vmathV4GetX( const VmathVector4 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathV4SetY( VmathVector4 *result, float _y )
+{
+    result->y = _y;
+}
+
+static inline float vmathV4GetY( const VmathVector4 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathV4SetZ( VmathVector4 *result, float _z )
+{
+    result->z = _z;
+}
+
+static inline float vmathV4GetZ( const VmathVector4 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathV4SetW( VmathVector4 *result, float _w )
+{
+    result->w = _w;
+}
+
+static inline float vmathV4GetW( const VmathVector4 *vec )
+{
+    return vec->w;
+}
+
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = ( vec0->x + vec1->x );
+    result->y = ( vec0->y + vec1->y );
+    result->z = ( vec0->z + vec1->z );
+    result->w = ( vec0->w + vec1->w );
+}
+
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = ( vec0->x - vec1->x );
+    result->y = ( vec0->y - vec1->y );
+    result->z = ( vec0->z - vec1->z );
+    result->w = ( vec0->w - vec1->w );
+}
+
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->x = ( vec->x * scalar );
+    result->y = ( vec->y * scalar );
+    result->z = ( vec->z * scalar );
+    result->w = ( vec->w * scalar );
+}
+
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->x = ( vec->x / scalar );
+    result->y = ( vec->y / scalar );
+    result->z = ( vec->z / scalar );
+    result->w = ( vec->w / scalar );
+}
+
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = -vec->x;
+    result->y = -vec->y;
+    result->z = -vec->z;
+    result->w = -vec->w;
+}
+
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = ( vec0->x * vec1->x );
+    result->y = ( vec0->y * vec1->y );
+    result->z = ( vec0->z * vec1->z );
+    result->w = ( vec0->w * vec1->w );
+}
+
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = ( vec0->x / vec1->x );
+    result->y = ( vec0->y / vec1->y );
+    result->z = ( vec0->z / vec1->z );
+    result->w = ( vec0->w / vec1->w );
+}
+
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = ( 1.0f / vec->x );
+    result->y = ( 1.0f / vec->y );
+    result->z = ( 1.0f / vec->z );
+    result->w = ( 1.0f / vec->w );
+}
+
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = sqrtf( vec->x );
+    result->y = sqrtf( vec->y );
+    result->z = sqrtf( vec->z );
+    result->w = sqrtf( vec->w );
+}
+
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = ( 1.0f / sqrtf( vec->x ) );
+    result->y = ( 1.0f / sqrtf( vec->y ) );
+    result->z = ( 1.0f / sqrtf( vec->z ) );
+    result->w = ( 1.0f / sqrtf( vec->w ) );
+}
+
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->x = fabsf( vec->x );
+    result->y = fabsf( vec->y );
+    result->z = fabsf( vec->z );
+    result->w = fabsf( vec->w );
+}
+
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = ( vec1->x < 0.0f )? -fabsf( vec0->x ) : fabsf( vec0->x );
+    result->y = ( vec1->y < 0.0f )? -fabsf( vec0->y ) : fabsf( vec0->y );
+    result->z = ( vec1->z < 0.0f )? -fabsf( vec0->z ) : fabsf( vec0->z );
+    result->w = ( vec1->w < 0.0f )? -fabsf( vec0->w ) : fabsf( vec0->w );
+}
+
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = (vec0->x > vec1->x)? vec0->x : vec1->x;
+    result->y = (vec0->y > vec1->y)? vec0->y : vec1->y;
+    result->z = (vec0->z > vec1->z)? vec0->z : vec1->z;
+    result->w = (vec0->w > vec1->w)? vec0->w : vec1->w;
+}
+
+static inline float vmathV4MaxElem( const VmathVector4 *vec )
+{
+    float result;
+    result = (vec->x > vec->y)? vec->x : vec->y;
+    result = (vec->z > result)? vec->z : result;
+    result = (vec->w > result)? vec->w : result;
+    return result;
+}
+
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->x = (vec0->x < vec1->x)? vec0->x : vec1->x;
+    result->y = (vec0->y < vec1->y)? vec0->y : vec1->y;
+    result->z = (vec0->z < vec1->z)? vec0->z : vec1->z;
+    result->w = (vec0->w < vec1->w)? vec0->w : vec1->w;
+}
+
+static inline float vmathV4MinElem( const VmathVector4 *vec )
+{
+    float result;
+    result = (vec->x < vec->y)? vec->x : vec->y;
+    result = (vec->z < result)? vec->z : result;
+    result = (vec->w < result)? vec->w : result;
+    return result;
+}
+
+static inline float vmathV4Sum( const VmathVector4 *vec )
+{
+    float result;
+    result = ( vec->x + vec->y );
+    result = ( result + vec->z );
+    result = ( result + vec->w );
+    return result;
+}
+
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    float result;
+    result = ( vec0->x * vec1->x );
+    result = ( result + ( vec0->y * vec1->y ) );
+    result = ( result + ( vec0->z * vec1->z ) );
+    result = ( result + ( vec0->w * vec1->w ) );
+    return result;
+}
+
+static inline float vmathV4LengthSqr( const VmathVector4 *vec )
+{
+    float result;
+    result = ( vec->x * vec->x );
+    result = ( result + ( vec->y * vec->y ) );
+    result = ( result + ( vec->z * vec->z ) );
+    result = ( result + ( vec->w * vec->w ) );
+    return result;
+}
+
+static inline float vmathV4Length( const VmathVector4 *vec )
+{
+    return sqrtf( vmathV4LengthSqr( vec ) );
+}
+
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = vmathV4LengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    result->x = ( vec->x * lenInv );
+    result->y = ( vec->y * lenInv );
+    result->z = ( vec->z * lenInv );
+    result->w = ( vec->w * lenInv );
+}
+
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
+{
+    result->x = ( select1 )? vec1->x : vec0->x;
+    result->y = ( select1 )? vec1->y : vec0->y;
+    result->z = ( select1 )? vec1->z : vec0->z;
+    result->w = ( select1 )? vec1->w : vec0->w;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print( const VmathVector4 *vec )
+{
+    printf( "( %f %f %f %f )\n", vec->x, vec->y, vec->z, vec->w );
+}
+
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, vec->x, vec->y, vec->z, vec->w );
+}
+
+#endif
+
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathP3AddV3( result, pnt0, &tmpV3_1 );
+}
+
+static inline void vmathP3SetX( VmathPoint3 *result, float _x )
+{
+    result->x = _x;
+}
+
+static inline float vmathP3GetX( const VmathPoint3 *pnt )
+{
+    return pnt->x;
+}
+
+static inline void vmathP3SetY( VmathPoint3 *result, float _y )
+{
+    result->y = _y;
+}
+
+static inline float vmathP3GetY( const VmathPoint3 *pnt )
+{
+    return pnt->y;
+}
+
+static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
+{
+    result->z = _z;
+}
+
+static inline float vmathP3GetZ( const VmathPoint3 *pnt )
+{
+    return pnt->z;
+}
+
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
+{
+    return *(&pnt->x + idx);
+}
+
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = ( pnt0->x - pnt1->x );
+    result->y = ( pnt0->y - pnt1->y );
+    result->z = ( pnt0->z - pnt1->z );
+}
+
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->x = ( pnt->x + vec1->x );
+    result->y = ( pnt->y + vec1->y );
+    result->z = ( pnt->z + vec1->z );
+}
+
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->x = ( pnt->x - vec1->x );
+    result->y = ( pnt->y - vec1->y );
+    result->z = ( pnt->z - vec1->z );
+}
+
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = ( pnt0->x * pnt1->x );
+    result->y = ( pnt0->y * pnt1->y );
+    result->z = ( pnt0->z * pnt1->z );
+}
+
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = ( pnt0->x / pnt1->x );
+    result->y = ( pnt0->y / pnt1->y );
+    result->z = ( pnt0->z / pnt1->z );
+}
+
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->x = ( 1.0f / pnt->x );
+    result->y = ( 1.0f / pnt->y );
+    result->z = ( 1.0f / pnt->z );
+}
+
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->x = sqrtf( pnt->x );
+    result->y = sqrtf( pnt->y );
+    result->z = sqrtf( pnt->z );
+}
+
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->x = ( 1.0f / sqrtf( pnt->x ) );
+    result->y = ( 1.0f / sqrtf( pnt->y ) );
+    result->z = ( 1.0f / sqrtf( pnt->z ) );
+}
+
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->x = fabsf( pnt->x );
+    result->y = fabsf( pnt->y );
+    result->z = fabsf( pnt->z );
+}
+
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = ( pnt1->x < 0.0f )? -fabsf( pnt0->x ) : fabsf( pnt0->x );
+    result->y = ( pnt1->y < 0.0f )? -fabsf( pnt0->y ) : fabsf( pnt0->y );
+    result->z = ( pnt1->z < 0.0f )? -fabsf( pnt0->z ) : fabsf( pnt0->z );
+}
+
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = (pnt0->x > pnt1->x)? pnt0->x : pnt1->x;
+    result->y = (pnt0->y > pnt1->y)? pnt0->y : pnt1->y;
+    result->z = (pnt0->z > pnt1->z)? pnt0->z : pnt1->z;
+}
+
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
+{
+    float result;
+    result = (pnt->x > pnt->y)? pnt->x : pnt->y;
+    result = (pnt->z > result)? pnt->z : result;
+    return result;
+}
+
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->x = (pnt0->x < pnt1->x)? pnt0->x : pnt1->x;
+    result->y = (pnt0->y < pnt1->y)? pnt0->y : pnt1->y;
+    result->z = (pnt0->z < pnt1->z)? pnt0->z : pnt1->z;
+}
+
+static inline float vmathP3MinElem( const VmathPoint3 *pnt )
+{
+    float result;
+    result = (pnt->x < pnt->y)? pnt->x : pnt->y;
+    result = (pnt->z < result)? pnt->z : result;
+    return result;
+}
+
+static inline float vmathP3Sum( const VmathPoint3 *pnt )
+{
+    float result;
+    result = ( pnt->x + pnt->y );
+    result = ( result + pnt->z );
+    return result;
+}
+
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
+{
+    float result;
+    result = ( pnt->x * unitVec->x );
+    result = ( result + ( pnt->y * unitVec->y ) );
+    result = ( result + ( pnt->z * unitVec->z ) );
+    return result;
+}
+
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
+{
+    result->x = ( select1 )? pnt1->x : pnt0->x;
+    result->y = ( select1 )? pnt1->y : pnt0->y;
+    result->z = ( select1 )? pnt1->z : pnt0->z;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print( const VmathPoint3 *pnt )
+{
+    printf( "( %f %f %f )\n", pnt->x, pnt->y, pnt->z );
+}
+
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
+{
+    printf( "%s: ( %f %f %f )\n", name, pnt->x, pnt->y, pnt->z );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos_v.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos_v.h
index 75c49370d..de8adb5ad 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/vec_aos_v.h
@@ -1,848 +1,848 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_V_C_H
-#define _VECTORMATH_VEC_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathVector3 result;
-    vmathV3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector3 result;
-    vmathV3MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
-{
-    VmathVector3 result;
-    vmathV3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeXAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeYAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeZAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathVector3 result;
-    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
-{
-    vmathV3SetX(result, _x);
-}
-
-static inline float vmathV3GetX_V( VmathVector3 vec )
-{
-    return vmathV3GetX(&vec);
-}
-
-static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
-{
-    vmathV3SetY(result, _y);
-}
-
-static inline float vmathV3GetY_V( VmathVector3 vec )
-{
-    return vmathV3GetY(&vec);
-}
-
-static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
-{
-    vmathV3SetZ(result, _z);
-}
-
-static inline float vmathV3GetZ_V( VmathVector3 vec )
-{
-    return vmathV3GetZ(&vec);
-}
-
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
-{
-    vmathV3SetElem(result, idx, value);
-}
-
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
-{
-    return vmathV3GetElem(&vec, idx);
-}
-
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathV3AddP3(&result, &vec, &pnt1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MaxElem_V( VmathVector3 vec )
-{
-    return vmathV3MaxElem(&vec);
-}
-
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MinElem_V( VmathVector3 vec )
-{
-    return vmathV3MinElem(&vec);
-}
-
-static inline float vmathV3Sum_V( VmathVector3 vec )
-{
-    return vmathV3Sum(&vec);
-}
-
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    return vmathV3Dot(&vec0, &vec1);
-}
-
-static inline float vmathV3LengthSqr_V( VmathVector3 vec )
-{
-    return vmathV3LengthSqr(&vec);
-}
-
-static inline float vmathV3Length_V( VmathVector3 vec )
-{
-    return vmathV3Length(&vec);
-}
-
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Cross(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
-{
-    VmathVector3 result;
-    vmathV3Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print_V( VmathVector3 vec )
-{
-    vmathV3Print(&vec);
-}
-
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
-{
-    vmathV3Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathV4MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
-{
-    VmathVector4 result;
-    vmathV4MakeFromQ(&result, &quat);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
-{
-    VmathVector4 result;
-    vmathV4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeXAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeYAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeZAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeWAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeWAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
-{
-    VmathVector4 result;
-    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
-{
-    vmathV4SetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
-{
-    VmathVector3 result;
-    vmathV4GetXYZ(&result, &vec);
-    return result;
-}
-
-static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
-{
-    vmathV4SetX(result, _x);
-}
-
-static inline float vmathV4GetX_V( VmathVector4 vec )
-{
-    return vmathV4GetX(&vec);
-}
-
-static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
-{
-    vmathV4SetY(result, _y);
-}
-
-static inline float vmathV4GetY_V( VmathVector4 vec )
-{
-    return vmathV4GetY(&vec);
-}
-
-static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
-{
-    vmathV4SetZ(result, _z);
-}
-
-static inline float vmathV4GetZ_V( VmathVector4 vec )
-{
-    return vmathV4GetZ(&vec);
-}
-
-static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
-{
-    vmathV4SetW(result, _w);
-}
-
-static inline float vmathV4GetW_V( VmathVector4 vec )
-{
-    return vmathV4GetW(&vec);
-}
-
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
-{
-    vmathV4SetElem(result, idx, value);
-}
-
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
-{
-    return vmathV4GetElem(&vec, idx);
-}
-
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MaxElem_V( VmathVector4 vec )
-{
-    return vmathV4MaxElem(&vec);
-}
-
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MinElem_V( VmathVector4 vec )
-{
-    return vmathV4MinElem(&vec);
-}
-
-static inline float vmathV4Sum_V( VmathVector4 vec )
-{
-    return vmathV4Sum(&vec);
-}
-
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    return vmathV4Dot(&vec0, &vec1);
-}
-
-static inline float vmathV4LengthSqr_V( VmathVector4 vec )
-{
-    return vmathV4LengthSqr(&vec);
-}
-
-static inline float vmathV4Length_V( VmathVector4 vec )
-{
-    return vmathV4Length(&vec);
-}
-
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
-{
-    VmathVector4 result;
-    vmathV4Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print_V( VmathVector4 vec )
-{
-    vmathV4Print(&vec);
-}
-
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
-{
-    vmathV4Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3Lerp(&result, t, &pnt0, &pnt1);
-    return result;
-}
-
-static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
-{
-    vmathP3SetX(result, _x);
-}
-
-static inline float vmathP3GetX_V( VmathPoint3 pnt )
-{
-    return vmathP3GetX(&pnt);
-}
-
-static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
-{
-    vmathP3SetY(result, _y);
-}
-
-static inline float vmathP3GetY_V( VmathPoint3 pnt )
-{
-    return vmathP3GetY(&pnt);
-}
-
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
-{
-    vmathP3SetZ(result, _z);
-}
-
-static inline float vmathP3GetZ_V( VmathPoint3 pnt )
-{
-    return vmathP3GetZ(&pnt);
-}
-
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
-{
-    vmathP3SetElem(result, idx, value);
-}
-
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
-{
-    return vmathP3GetElem(&pnt, idx);
-}
-
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathVector3 result;
-    vmathP3Sub(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3AddV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3SubV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MulPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3DivPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RecipPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3SqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RsqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3AbsPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MaxElem(&pnt);
-}
-
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MinPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MinElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MinElem(&pnt);
-}
-
-static inline float vmathP3Sum_V( VmathPoint3 pnt )
-{
-    return vmathP3Sum(&pnt);
-}
-
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
-{
-    VmathPoint3 result;
-    vmathP3Scale(&result, &pnt, scaleVal);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
-{
-    VmathPoint3 result;
-    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
-    return result;
-}
-
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
-{
-    return vmathP3Projection(&pnt, &unitVec);
-}
-
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistSqrFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3DistSqr(&pnt0, &pnt1);
-}
-
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3Dist(&pnt0, &pnt1);
-}
-
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
-{
-    VmathPoint3 result;
-    vmathP3Select(&result, &pnt0, &pnt1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print_V( VmathPoint3 pnt )
-{
-    vmathP3Print(&pnt);
-}
-
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
-{
-    vmathP3Prints(&pnt, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_V_C_H
+#define _VECTORMATH_VEC_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathVector3 result;
+    vmathV3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector3 result;
+    vmathV3MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
+{
+    VmathVector3 result;
+    vmathV3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeXAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeYAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeZAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathVector3 result;
+    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
+{
+    vmathV3SetX(result, _x);
+}
+
+static inline float vmathV3GetX_V( VmathVector3 vec )
+{
+    return vmathV3GetX(&vec);
+}
+
+static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
+{
+    vmathV3SetY(result, _y);
+}
+
+static inline float vmathV3GetY_V( VmathVector3 vec )
+{
+    return vmathV3GetY(&vec);
+}
+
+static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
+{
+    vmathV3SetZ(result, _z);
+}
+
+static inline float vmathV3GetZ_V( VmathVector3 vec )
+{
+    return vmathV3GetZ(&vec);
+}
+
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
+{
+    vmathV3SetElem(result, idx, value);
+}
+
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
+{
+    return vmathV3GetElem(&vec, idx);
+}
+
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathV3AddP3(&result, &vec, &pnt1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MaxElem_V( VmathVector3 vec )
+{
+    return vmathV3MaxElem(&vec);
+}
+
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MinElem_V( VmathVector3 vec )
+{
+    return vmathV3MinElem(&vec);
+}
+
+static inline float vmathV3Sum_V( VmathVector3 vec )
+{
+    return vmathV3Sum(&vec);
+}
+
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    return vmathV3Dot(&vec0, &vec1);
+}
+
+static inline float vmathV3LengthSqr_V( VmathVector3 vec )
+{
+    return vmathV3LengthSqr(&vec);
+}
+
+static inline float vmathV3Length_V( VmathVector3 vec )
+{
+    return vmathV3Length(&vec);
+}
+
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Cross(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
+{
+    VmathVector3 result;
+    vmathV3Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print_V( VmathVector3 vec )
+{
+    vmathV3Print(&vec);
+}
+
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
+{
+    vmathV3Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathV4MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
+{
+    VmathVector4 result;
+    vmathV4MakeFromQ(&result, &quat);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
+{
+    VmathVector4 result;
+    vmathV4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeXAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeYAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeZAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeWAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeWAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
+{
+    VmathVector4 result;
+    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
+{
+    vmathV4SetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
+{
+    VmathVector3 result;
+    vmathV4GetXYZ(&result, &vec);
+    return result;
+}
+
+static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
+{
+    vmathV4SetX(result, _x);
+}
+
+static inline float vmathV4GetX_V( VmathVector4 vec )
+{
+    return vmathV4GetX(&vec);
+}
+
+static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
+{
+    vmathV4SetY(result, _y);
+}
+
+static inline float vmathV4GetY_V( VmathVector4 vec )
+{
+    return vmathV4GetY(&vec);
+}
+
+static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
+{
+    vmathV4SetZ(result, _z);
+}
+
+static inline float vmathV4GetZ_V( VmathVector4 vec )
+{
+    return vmathV4GetZ(&vec);
+}
+
+static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
+{
+    vmathV4SetW(result, _w);
+}
+
+static inline float vmathV4GetW_V( VmathVector4 vec )
+{
+    return vmathV4GetW(&vec);
+}
+
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
+{
+    vmathV4SetElem(result, idx, value);
+}
+
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
+{
+    return vmathV4GetElem(&vec, idx);
+}
+
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MaxElem_V( VmathVector4 vec )
+{
+    return vmathV4MaxElem(&vec);
+}
+
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MinElem_V( VmathVector4 vec )
+{
+    return vmathV4MinElem(&vec);
+}
+
+static inline float vmathV4Sum_V( VmathVector4 vec )
+{
+    return vmathV4Sum(&vec);
+}
+
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    return vmathV4Dot(&vec0, &vec1);
+}
+
+static inline float vmathV4LengthSqr_V( VmathVector4 vec )
+{
+    return vmathV4LengthSqr(&vec);
+}
+
+static inline float vmathV4Length_V( VmathVector4 vec )
+{
+    return vmathV4Length(&vec);
+}
+
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
+{
+    VmathVector4 result;
+    vmathV4Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print_V( VmathVector4 vec )
+{
+    vmathV4Print(&vec);
+}
+
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
+{
+    vmathV4Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3Lerp(&result, t, &pnt0, &pnt1);
+    return result;
+}
+
+static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
+{
+    vmathP3SetX(result, _x);
+}
+
+static inline float vmathP3GetX_V( VmathPoint3 pnt )
+{
+    return vmathP3GetX(&pnt);
+}
+
+static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
+{
+    vmathP3SetY(result, _y);
+}
+
+static inline float vmathP3GetY_V( VmathPoint3 pnt )
+{
+    return vmathP3GetY(&pnt);
+}
+
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
+{
+    vmathP3SetZ(result, _z);
+}
+
+static inline float vmathP3GetZ_V( VmathPoint3 pnt )
+{
+    return vmathP3GetZ(&pnt);
+}
+
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
+{
+    vmathP3SetElem(result, idx, value);
+}
+
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
+{
+    return vmathP3GetElem(&pnt, idx);
+}
+
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathVector3 result;
+    vmathP3Sub(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3AddV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3SubV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MulPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3DivPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RecipPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3SqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RsqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3AbsPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MaxElem(&pnt);
+}
+
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MinPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MinElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MinElem(&pnt);
+}
+
+static inline float vmathP3Sum_V( VmathPoint3 pnt )
+{
+    return vmathP3Sum(&pnt);
+}
+
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
+{
+    VmathPoint3 result;
+    vmathP3Scale(&result, &pnt, scaleVal);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
+{
+    VmathPoint3 result;
+    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
+    return result;
+}
+
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
+{
+    return vmathP3Projection(&pnt, &unitVec);
+}
+
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistSqrFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3DistSqr(&pnt0, &pnt1);
+}
+
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3Dist(&pnt0, &pnt1);
+}
+
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
+{
+    VmathPoint3 result;
+    vmathP3Select(&result, &pnt0, &pnt1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print_V( VmathPoint3 pnt )
+{
+    vmathP3Print(&pnt);
+}
+
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
+{
+    vmathP3Prints(&pnt, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos.h
index 33afb9577..73dfc8a54 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos.h
@@ -1,1879 +1,1879 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_H
-#define _VECTORMATH_AOS_C_H
-
-#include <math.h>
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    float x;
-    float y;
-    float z;
-#ifndef __GNUC__
-    float d;
-#endif
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    float x;
-    float y;
-    float z;
-    float w;
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    float x;
-    float y;
-    float z;
-#ifndef __GNUC__
-    float d;
-#endif
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    float x;
-    float y;
-    float z;
-    float w;
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Copy a 3-D vector
- */
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX( const VmathVector3 *vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY( const VmathVector3 *vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ( const VmathVector3 *vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV3MakeXAxis( VmathVector3 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV3MakeYAxis( VmathVector3 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV3MakeZAxis( VmathVector3 *result );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem( const VmathVector3 *vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem( const VmathVector3 *vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum( const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr( const VmathVector3 *vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length( const VmathVector3 *vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- */
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print( const VmathVector3 *vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 4-D vector
- */
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX( const VmathVector4 *vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY( const VmathVector4 *vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ( const VmathVector4 *vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW( const VmathVector4 *vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV4MakeXAxis( VmathVector4 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV4MakeYAxis( VmathVector4 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV4MakeZAxis( VmathVector4 *result );
-
-/*
- * Construct w axis
- */
-static inline void vmathV4MakeWAxis( VmathVector4 *result );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem( const VmathVector4 *vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem( const VmathVector4 *vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum( const VmathVector4 *vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr( const VmathVector4 *vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length( const VmathVector4 *vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- */
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print( const VmathVector4 *vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 3-D point
- */
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX( const VmathPoint3 *pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY( const VmathPoint3 *pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ( const VmathPoint3 *pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem( const VmathPoint3 *pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum( const VmathPoint3 *pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- */
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print( const VmathPoint3 *pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
-
-#endif
-
-/*
- * Copy a quaternion
- */
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX( const VmathQuat *quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY( const VmathQuat *quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ( const VmathQuat *quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW( const VmathQuat *quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem( const VmathQuat *quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline void vmathQMakeIdentity( VmathQuat *result );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm( const VmathQuat *quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength( const VmathQuat *quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- */
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint( const VmathQuat *quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints( const VmathQuat *quat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x3 matrix
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant( const VmathMatrix3 *mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- */
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print( const VmathMatrix3 *mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 4x4 matrix
- */
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant( const VmathMatrix4 *mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- */
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print( const VmathMatrix4 *mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x4 transformation matrix
- */
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline void vmathT3MakeIdentity( VmathTransform3 *result );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- */
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print( const VmathTransform3 *tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_SCALAR_H
+#define _VECTORMATH_AOS_C_SCALAR_H
+
+#include <math.h>
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    float x;
+    float y;
+    float z;
+#ifndef __GNUC__
+    float d;
+#endif
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    float x;
+    float y;
+    float z;
+    float w;
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    float x;
+    float y;
+    float z;
+#ifndef __GNUC__
+    float d;
+#endif
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    float x;
+    float y;
+    float z;
+    float w;
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Copy a 3-D vector
+ */
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX( const VmathVector3 *vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY( const VmathVector3 *vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ( const VmathVector3 *vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV3MakeXAxis( VmathVector3 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV3MakeYAxis( VmathVector3 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV3MakeZAxis( VmathVector3 *result );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem( const VmathVector3 *vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem( const VmathVector3 *vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum( const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr( const VmathVector3 *vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length( const VmathVector3 *vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ */
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print( const VmathVector3 *vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 4-D vector
+ */
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX( const VmathVector4 *vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY( const VmathVector4 *vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ( const VmathVector4 *vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW( const VmathVector4 *vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV4MakeXAxis( VmathVector4 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV4MakeYAxis( VmathVector4 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV4MakeZAxis( VmathVector4 *result );
+
+/*
+ * Construct w axis
+ */
+static inline void vmathV4MakeWAxis( VmathVector4 *result );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem( const VmathVector4 *vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem( const VmathVector4 *vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum( const VmathVector4 *vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr( const VmathVector4 *vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length( const VmathVector4 *vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ */
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print( const VmathVector4 *vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 3-D point
+ */
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX( const VmathPoint3 *pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY( const VmathPoint3 *pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ( const VmathPoint3 *pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem( const VmathPoint3 *pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum( const VmathPoint3 *pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ */
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print( const VmathPoint3 *pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
+
+#endif
+
+/*
+ * Copy a quaternion
+ */
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX( const VmathQuat *quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY( const VmathQuat *quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ( const VmathQuat *quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW( const VmathQuat *quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem( const VmathQuat *quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline void vmathQMakeIdentity( VmathQuat *result );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm( const VmathQuat *quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength( const VmathQuat *quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ */
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint( const VmathQuat *quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints( const VmathQuat *quat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x3 matrix
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant( const VmathMatrix3 *mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ */
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print( const VmathMatrix3 *mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 4x4 matrix
+ */
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant( const VmathMatrix4 *mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ */
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print( const VmathMatrix4 *mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x4 transformation matrix
+ */
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline void vmathT3MakeIdentity( VmathTransform3 *result );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ */
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print( const VmathTransform3 *tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos_v.h b/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos_v.h
index a977c806d..e827260fb 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/c/vectormath_aos_v.h
@@ -1,1845 +1,1845 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_V_H
-#define _VECTORMATH_AOS_C_V_H
-
-#include <math.h>
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    float x;
-    float y;
-    float z;
-#ifndef __GNUC__
-    float d;
-#endif
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    float x;
-    float y;
-    float z;
-    float w;
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    float x;
-    float y;
-    float z;
-#ifndef __GNUC__
-    float d;
-#endif
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    float x;
-    float y;
-    float z;
-    float w;
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX_V( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY_V( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX_V( VmathVector3 vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY_V( VmathVector3 vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ_V( VmathVector3 vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector3 vmathV3MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector3 vmathV3MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector3 vmathV3MakeZAxis_V( );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem_V( VmathVector3 vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem_V( VmathVector3 vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum_V( VmathVector3 vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr_V( VmathVector3 vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length_V( VmathVector3 vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- */
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print_V( VmathVector3 vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX_V( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY_V( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW_V( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX_V( VmathVector4 vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY_V( VmathVector4 vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ_V( VmathVector4 vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW_V( VmathVector4 vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector4 vmathV4MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector4 vmathV4MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector4 vmathV4MakeZAxis_V( );
-
-/*
- * Construct w axis
- */
-static inline VmathVector4 vmathV4MakeWAxis_V( );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem_V( VmathVector4 vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem_V( VmathVector4 vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum_V( VmathVector4 vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr_V( VmathVector4 vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length_V( VmathVector4 vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- */
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print_V( VmathVector4 vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX_V( VmathPoint3 pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY_V( VmathPoint3 pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ_V( VmathPoint3 pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum_V( VmathPoint3 pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- */
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print_V( VmathPoint3 pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
-
-#endif
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX_V( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY_V( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ_V( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW_V( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX_V( VmathQuat quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY_V( VmathQuat quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ_V( VmathQuat quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW_V( VmathQuat quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem_V( VmathQuat quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline VmathQuat vmathQNeg_V( VmathQuat quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline VmathQuat vmathQMakeIdentity_V( );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline VmathQuat vmathQMakeRotationX_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline VmathQuat vmathQMakeRotationY_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline VmathQuat vmathQMakeRotationZ_V( float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline VmathQuat vmathQConj_V( VmathQuat quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm_V( VmathQuat quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength_V( VmathQuat quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- */
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint_V( VmathQuat quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints_V( VmathQuat quat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3MakeIdentity_V( );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant_V( VmathMatrix3 mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print_V( VmathMatrix3 mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4MakeIdentity_V( );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant_V( VmathMatrix4 mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print_V( VmathMatrix4 mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline VmathTransform3 vmathT3MakeIdentity_V( );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- */
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print_V( VmathTransform3 tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vectormath_aos.h"
-#include "vec_aos_v.h"
-#include "quat_aos_v.h"
-#include "mat_aos_v.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_V_SCALAR_H
+#define _VECTORMATH_AOS_C_V_SCALAR_H
+
+#include <math.h>
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    float x;
+    float y;
+    float z;
+#ifndef __GNUC__
+    float d;
+#endif
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    float x;
+    float y;
+    float z;
+    float w;
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    float x;
+    float y;
+    float z;
+#ifndef __GNUC__
+    float d;
+#endif
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    float x;
+    float y;
+    float z;
+    float w;
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX_V( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY_V( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX_V( VmathVector3 vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY_V( VmathVector3 vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ_V( VmathVector3 vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector3 vmathV3MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector3 vmathV3MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector3 vmathV3MakeZAxis_V( );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem_V( VmathVector3 vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem_V( VmathVector3 vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum_V( VmathVector3 vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr_V( VmathVector3 vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length_V( VmathVector3 vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print_V( VmathVector3 vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX_V( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY_V( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW_V( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX_V( VmathVector4 vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY_V( VmathVector4 vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ_V( VmathVector4 vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW_V( VmathVector4 vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector4 vmathV4MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector4 vmathV4MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector4 vmathV4MakeZAxis_V( );
+
+/*
+ * Construct w axis
+ */
+static inline VmathVector4 vmathV4MakeWAxis_V( );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem_V( VmathVector4 vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem_V( VmathVector4 vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum_V( VmathVector4 vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr_V( VmathVector4 vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length_V( VmathVector4 vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ */
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print_V( VmathVector4 vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX_V( VmathPoint3 pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY_V( VmathPoint3 pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ_V( VmathPoint3 pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum_V( VmathPoint3 pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ */
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print_V( VmathPoint3 pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
+
+#endif
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX_V( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY_V( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ_V( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW_V( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX_V( VmathQuat quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY_V( VmathQuat quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ_V( VmathQuat quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW_V( VmathQuat quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem_V( VmathQuat quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline VmathQuat vmathQNeg_V( VmathQuat quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline VmathQuat vmathQMakeIdentity_V( );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline VmathQuat vmathQMakeRotationX_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline VmathQuat vmathQMakeRotationY_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline VmathQuat vmathQMakeRotationZ_V( float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline VmathQuat vmathQConj_V( VmathQuat quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm_V( VmathQuat quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength_V( VmathQuat quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ */
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint_V( VmathQuat quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints_V( VmathQuat quat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3MakeIdentity_V( );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant_V( VmathMatrix3 mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print_V( VmathMatrix3 mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4MakeIdentity_V( );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant_V( VmathMatrix4 mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print_V( VmathMatrix4 mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline VmathTransform3 vmathT3MakeIdentity_V( );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ */
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print_V( VmathTransform3 tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vectormath_aos.h"
+#include "vec_aos_v.h"
+#include "quat_aos_v.h"
+#include "mat_aos_v.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/mat_aos.h
index 5d5d012d9..0f2a85057 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/mat_aos.h
@@ -1,1643 +1,1643 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_CPP_H
-#define _VECTORMATH_MAT_AOS_CPP_H
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Constants
-
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( const Quat & unitQuat )
-{
-    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat.getX();
-    qy = unitQuat.getY();
-    qz = unitQuat.getZ();
-    qw = unitQuat.getW();
-    qx2 = ( qx + qx );
-    qy2 = ( qy + qy );
-    qz2 = ( qz + qz );
-    qxqx2 = ( qx * qx2 );
-    qxqy2 = ( qx * qy2 );
-    qxqz2 = ( qx * qz2 );
-    qxqw2 = ( qw * qx2 );
-    qyqy2 = ( qy * qy2 );
-    qyqz2 = ( qy * qz2 );
-    qyqw2 = ( qw * qy2 );
-    qzqz2 = ( qz * qz2 );
-    qzqw2 = ( qw * qz2 );
-    mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
-    mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
-    mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
-}
-
-inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, float val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline float Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    return Matrix3(
-        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
-        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
-        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    Vector3 tmp0, tmp1, tmp2;
-    float detinv;
-    tmp0 = cross( mat.getCol1(), mat.getCol2() );
-    tmp1 = cross( mat.getCol2(), mat.getCol0() );
-    tmp2 = cross( mat.getCol0(), mat.getCol1() );
-    detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) );
-    return Matrix3(
-        Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ),
-        Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ),
-        Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) )
-    );
-}
-
-inline float determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( float scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
-        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
-        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( 0.0f, c, s ),
-        Vector3( 0.0f, -s, c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix3(
-        Vector3( c, 0.0f, -s ),
-        Vector3::yAxis( ),
-        Vector3( s, 0.0f, c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix3(
-        Vector3( c, s, 0.0f ),
-        Vector3( -s, c, 0.0f ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ.getX() );
-    cX = cosf( radiansXYZ.getX() );
-    sY = sinf( radiansXYZ.getY() );
-    cY = cosf( radiansXYZ.getY() );
-    sZ = sinf( radiansXYZ.getZ() );
-    cZ = cosf( radiansXYZ.getZ() );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    return Matrix3(
-        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
-        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
-        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec )
-{
-    float x, y, z, s, c, oneMinusC, xy, yz, zx;
-    s = sinf( radians );
-    c = cosf( radians );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = ( x * y );
-    yz = ( y * z );
-    zx = ( z * x );
-    oneMinusC = ( 1.0f - c );
-    return Matrix3(
-        Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ),
-        Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ),
-        Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
-{
-    return Matrix3(
-        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
-        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
-        Vector3( 0.0f, 0.0f, scaleVec.getZ() )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( float scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( mat.getCol3(), 1.0f );
-}
-
-inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, float val )
-{
-    Vector4 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline float Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    return Matrix4(
-        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
-        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
-        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
-        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    Vector4 res0, res1, res2, res3;
-    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
-    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
-    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
-    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
-    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
-    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
-    res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
-    res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
-    res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
-    res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
-    detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) );
-    res1.setX( ( mI * tmp1 ) );
-    res1.setY( ( mM * tmp0 ) );
-    res1.setZ( ( mA * tmp1 ) );
-    res1.setW( ( mE * tmp0 ) );
-    res3.setX( ( mI * tmp3 ) );
-    res3.setY( ( mM * tmp2 ) );
-    res3.setZ( ( mA * tmp3 ) );
-    res3.setW( ( mE * tmp2 ) );
-    res2.setX( ( mI * tmp5 ) );
-    res2.setY( ( mM * tmp4 ) );
-    res2.setZ( ( mA * tmp5 ) );
-    res2.setW( ( mE * tmp4 ) );
-    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
-    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
-    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
-    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
-    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
-    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
-    res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) );
-    res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) );
-    res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) );
-    res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) );
-    res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) );
-    res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) );
-    res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) );
-    res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) );
-    res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) );
-    res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) );
-    res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) );
-    res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) );
-    return Matrix4(
-        ( res0 * detInv ),
-        ( res1 * detInv ),
-        ( res2 * detInv ),
-        ( res3 * detInv )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline float determinant( const Matrix4 & mat )
-{
-    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
-    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
-    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
-    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
-    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
-    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
-    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
-    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
-    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
-    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
-    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( float scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
-{
-    return Vector4(
-        ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ),
-        ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ),
-        ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ),
-        ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
-{
-    return Vector4(
-        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
-        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
-        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ),
-        ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
-{
-    return Vector4(
-        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
-        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
-        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ),
-        ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( 0.0f, c, s, 0.0f ),
-        Vector4( 0.0f, -s, c, 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix4(
-        Vector4( c, 0.0f, -s, 0.0f ),
-        Vector4::yAxis( ),
-        Vector4( s, 0.0f, c, 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Matrix4(
-        Vector4( c, s, 0.0f, 0.0f ),
-        Vector4( -s, c, 0.0f, 0.0f ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ.getX() );
-    cX = cosf( radiansXYZ.getX() );
-    sY = sinf( radiansXYZ.getY() );
-    cY = cosf( radiansXYZ.getY() );
-    sZ = sinf( radiansXYZ.getZ() );
-    cZ = cosf( radiansXYZ.getZ() );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    return Matrix4(
-        Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ),
-        Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ),
-        Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec )
-{
-    float x, y, z, s, c, oneMinusC, xy, yz, zx;
-    s = sinf( radians );
-    c = cosf( radians );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = ( x * y );
-    yz = ( y * z );
-    zx = ( z * x );
-    oneMinusC = ( 1.0f - c );
-    return Matrix4(
-        Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ),
-        Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ),
-        Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
-{
-    return Matrix4(
-        Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ),
-        Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, 1.0f );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, 1.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
-    rangeInv = ( 1.0f / ( zNear - zFar ) );
-    return Matrix4(
-        Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, f, 0.0f, 0.0f ),
-        Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ),
-        Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = ( right + left );
-    sum_tb = ( top + bottom );
-    sum_nf = ( zNear + zFar );
-    inv_rl = ( 1.0f / ( right - left ) );
-    inv_tb = ( 1.0f / ( top - bottom ) );
-    inv_nf = ( 1.0f / ( zNear - zFar ) );
-    n2 = ( zNear + zNear );
-    return Matrix4(
-        Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ),
-        Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ),
-        Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = ( right + left );
-    sum_tb = ( top + bottom );
-    sum_nf = ( zNear + zFar );
-    inv_rl = ( 1.0f / ( right - left ) );
-    inv_tb = ( 1.0f / ( top - bottom ) );
-    inv_nf = ( 1.0f / ( zNear - zFar ) );
-    return Matrix4(
-        Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ),
-        Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ),
-        Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ),
-        Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-    print( mat.getRow( 3 ) );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, float val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline float Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
-    float detinv;
-    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
-    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
-    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
-    detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) );
-    inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) );
-    inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) );
-    inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    Vector3 inv0, inv1, inv2;
-    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
-    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
-    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
-        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
-        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
-    );
-}
-
-inline const Point3 Transform3::operator *( const Point3 & pnt ) const
-{
-    return Point3(
-        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
-        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
-        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() )
-    );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( 0.0f, c, s ),
-        Vector3( 0.0f, -s, c ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Transform3(
-        Vector3( c, 0.0f, -s ),
-        Vector3::yAxis( ),
-        Vector3( s, 0.0f, c ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( float radians )
-{
-    float s, c;
-    s = sinf( radians );
-    c = cosf( radians );
-    return Transform3(
-        Vector3( c, s, 0.0f ),
-        Vector3( -s, c, 0.0f ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sX = sinf( radiansXYZ.getX() );
-    cX = cosf( radiansXYZ.getX() );
-    sY = sinf( radiansXYZ.getY() );
-    cY = cosf( radiansXYZ.getY() );
-    sZ = sinf( radiansXYZ.getZ() );
-    cZ = cosf( radiansXYZ.getZ() );
-    tmp0 = ( cZ * sY );
-    tmp1 = ( sZ * sY );
-    return Transform3(
-        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
-        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
-        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::rotation( const Quat & unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
-{
-    return Transform3(
-        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
-        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
-        Vector3( 0.0f, 0.0f, scaleVec.getZ() ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( const Vector3 & translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    print( tfrm.getRow( 0 ) );
-    print( tfrm.getRow( 1 ) );
-    print( tfrm.getRow( 2 ) );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    int negTrace, ZgtX, ZgtY, YgtX;
-    int largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm.getCol0().getX();
-    yx = tfrm.getCol0().getY();
-    zx = tfrm.getCol0().getZ();
-    xy = tfrm.getCol1().getX();
-    yy = tfrm.getCol1().getY();
-    zy = tfrm.getCol1().getZ();
-    xz = tfrm.getCol2().getX();
-    yz = tfrm.getCol2().getY();
-    zz = tfrm.getCol2().getZ();
-
-    trace = ( ( xx + yy ) + zz );
-
-    negTrace = ( trace < 0.0f );
-    ZgtX = zz > xx;
-    ZgtY = zz > yy;
-    YgtX = yy > xx;
-    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
-    largestYorZ = ( YgtX || ZgtX ) && negTrace;
-    largestZorX = ( ZgtY || !YgtX ) && negTrace;
-    
-    if ( largestXorY )
-    {
-        zz = -zz;
-        xy = -xy;
-    }
-    if ( largestYorZ )
-    {
-        xx = -xx;
-        yz = -yz;
-    }
-    if ( largestZorX )
-    {
-        yy = -yy;
-        zx = -zx;
-    }
-
-    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
-    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
-
-    tmpx = ( ( zy - yz ) * scale );
-    tmpy = ( ( xz - zx ) * scale );
-    tmpz = ( ( yx - xy ) * scale );
-    tmpw = ( radicand * scale );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    if ( largestXorY )
-    {
-        qx = tmpw;
-        qy = tmpz;
-        qz = tmpy;
-        qw = tmpx;
-    }
-    if ( largestYorZ )
-    {
-        tmpx = qx;
-        tmpz = qz;
-        qx = qy;
-        qy = tmpx;
-        qz = qw;
-        qw = tmpz;
-    }
-
-    mX = qx;
-    mY = qy;
-    mZ = qz;
-    mW = qw;
-}
-
-inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Vector3(
-        ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ),
-        ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ),
-        ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) )
-    );
-}
-
-inline const Matrix3 crossMatrix( const Vector3 & vec )
-{
-    return Matrix3(
-        Vector3( 0.0f, vec.getZ(), -vec.getY() ),
-        Vector3( -vec.getZ(), 0.0f, vec.getX() ),
-        Vector3( vec.getY(), -vec.getX(), 0.0f )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const Quat & unitQuat )
+{
+    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat.getX();
+    qy = unitQuat.getY();
+    qz = unitQuat.getZ();
+    qw = unitQuat.getW();
+    qx2 = ( qx + qx );
+    qy2 = ( qy + qy );
+    qz2 = ( qz + qz );
+    qxqx2 = ( qx * qx2 );
+    qxqy2 = ( qx * qy2 );
+    qxqz2 = ( qx * qz2 );
+    qxqw2 = ( qw * qx2 );
+    qyqy2 = ( qy * qy2 );
+    qyqz2 = ( qy * qz2 );
+    qyqw2 = ( qw * qy2 );
+    qzqz2 = ( qz * qz2 );
+    qzqw2 = ( qw * qz2 );
+    mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
+    mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
+    mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
+}
+
+inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    return Matrix3(
+        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
+        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
+        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    Vector3 tmp0, tmp1, tmp2;
+    float detinv;
+    tmp0 = cross( mat.getCol1(), mat.getCol2() );
+    tmp1 = cross( mat.getCol2(), mat.getCol0() );
+    tmp2 = cross( mat.getCol0(), mat.getCol1() );
+    detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) );
+    return Matrix3(
+        Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ),
+        Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ),
+        Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) )
+    );
+}
+
+inline float determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( 0.0f, c, s ),
+        Vector3( 0.0f, -s, c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3( c, 0.0f, -s ),
+        Vector3::yAxis( ),
+        Vector3( s, 0.0f, c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3( c, s, 0.0f ),
+        Vector3( -s, c, 0.0f ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Matrix3(
+        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
+        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
+        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    return Matrix3(
+        Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ),
+        Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ),
+        Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
+{
+    return Matrix3(
+        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
+        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
+        Vector3( 0.0f, 0.0f, scaleVec.getZ() )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    return Matrix4(
+        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
+        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
+        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
+        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    Vector4 res0, res1, res2, res3;
+    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
+    res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
+    res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
+    res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
+    detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) );
+    res1.setX( ( mI * tmp1 ) );
+    res1.setY( ( mM * tmp0 ) );
+    res1.setZ( ( mA * tmp1 ) );
+    res1.setW( ( mE * tmp0 ) );
+    res3.setX( ( mI * tmp3 ) );
+    res3.setY( ( mM * tmp2 ) );
+    res3.setZ( ( mA * tmp3 ) );
+    res3.setW( ( mE * tmp2 ) );
+    res2.setX( ( mI * tmp5 ) );
+    res2.setY( ( mM * tmp4 ) );
+    res2.setZ( ( mA * tmp5 ) );
+    res2.setW( ( mE * tmp4 ) );
+    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
+    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
+    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
+    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
+    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
+    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
+    res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) );
+    res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) );
+    res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) );
+    res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) );
+    res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) );
+    res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) );
+    res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) );
+    res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) );
+    res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) );
+    res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) );
+    res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) );
+    res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) );
+    return Matrix4(
+        ( res0 * detInv ),
+        ( res1 * detInv ),
+        ( res2 * detInv ),
+        ( res3 * detInv )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline float determinant( const Matrix4 & mat )
+{
+    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
+    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
+    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
+    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
+    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ),
+        ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ),
+        ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ),
+        ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
+{
+    return Vector4(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ),
+        ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
+{
+    return Vector4(
+        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
+        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
+        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ),
+        ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( 0.0f, c, s, 0.0f ),
+        Vector4( 0.0f, -s, c, 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4( c, 0.0f, -s, 0.0f ),
+        Vector4::yAxis( ),
+        Vector4( s, 0.0f, c, 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4( c, s, 0.0f, 0.0f ),
+        Vector4( -s, c, 0.0f, 0.0f ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Matrix4(
+        Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ),
+        Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ),
+        Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    return Matrix4(
+        Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ),
+        Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ),
+        Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
+{
+    return Matrix4(
+        Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
+    rangeInv = ( 1.0f / ( zNear - zFar ) );
+    return Matrix4(
+        Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, f, 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ),
+        Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    n2 = ( zNear + zNear );
+    return Matrix4(
+        Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ),
+        Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ),
+        Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    return Matrix4(
+        Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ),
+        Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
+    float detinv;
+    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
+    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
+    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
+    detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) );
+    inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) );
+    inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) );
+    inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    Vector3 inv0, inv1, inv2;
+    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
+    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
+    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
+    );
+}
+
+inline const Point3 Transform3::operator *( const Point3 & pnt ) const
+{
+    return Point3(
+        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
+        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
+        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() )
+    );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( 0.0f, c, s ),
+        Vector3( 0.0f, -s, c ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3( c, 0.0f, -s ),
+        Vector3::yAxis( ),
+        Vector3( s, 0.0f, c ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3( c, s, 0.0f ),
+        Vector3( -s, c, 0.0f ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Transform3(
+        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
+        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
+        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::rotation( const Quat & unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
+{
+    return Transform3(
+        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
+        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
+        Vector3( 0.0f, 0.0f, scaleVec.getZ() ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( const Vector3 & translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    int negTrace, ZgtX, ZgtY, YgtX;
+    int largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm.getCol0().getX();
+    yx = tfrm.getCol0().getY();
+    zx = tfrm.getCol0().getZ();
+    xy = tfrm.getCol1().getX();
+    yy = tfrm.getCol1().getY();
+    zy = tfrm.getCol1().getZ();
+    xz = tfrm.getCol2().getX();
+    yz = tfrm.getCol2().getY();
+    zz = tfrm.getCol2().getZ();
+
+    trace = ( ( xx + yy ) + zz );
+
+    negTrace = ( trace < 0.0f );
+    ZgtX = zz > xx;
+    ZgtY = zz > yy;
+    YgtX = yy > xx;
+    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
+    largestYorZ = ( YgtX || ZgtX ) && negTrace;
+    largestZorX = ( ZgtY || !YgtX ) && negTrace;
+    
+    if ( largestXorY )
+    {
+        zz = -zz;
+        xy = -xy;
+    }
+    if ( largestYorZ )
+    {
+        xx = -xx;
+        yz = -yz;
+    }
+    if ( largestZorX )
+    {
+        yy = -yy;
+        zx = -zx;
+    }
+
+    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
+    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
+
+    tmpx = ( ( zy - yz ) * scale );
+    tmpy = ( ( xz - zx ) * scale );
+    tmpz = ( ( yx - xy ) * scale );
+    tmpw = ( radicand * scale );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    if ( largestXorY )
+    {
+        qx = tmpw;
+        qy = tmpz;
+        qz = tmpy;
+        qw = tmpx;
+    }
+    if ( largestYorZ )
+    {
+        tmpx = qx;
+        tmpz = qz;
+        qx = qy;
+        qy = tmpx;
+        qz = qw;
+        qw = tmpz;
+    }
+
+    mX = qx;
+    mY = qy;
+    mZ = qz;
+    mW = qw;
+}
+
+inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Vector3(
+        ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ),
+        ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ),
+        ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) )
+    );
+}
+
+inline const Matrix3 crossMatrix( const Vector3 & vec )
+{
+    return Matrix3(
+        Vector3( 0.0f, vec.getZ(), -vec.getY() ),
+        Vector3( -vec.getZ(), 0.0f, vec.getX() ),
+        Vector3( vec.getY(), -vec.getX(), 0.0f )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/quat_aos.h
index 7f1e8822b..603f522d9 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/quat_aos.h
@@ -1,432 +1,432 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_CPP_H
-#define _VECTORMATH_QUAT_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Quat::Quat( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-}
-
-inline Quat::Quat( float _x, float _y, float _z, float _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Quat::Quat( const Vector3 & xyz, float _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Quat::Quat( const Vector4 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = vec.getW();
-}
-
-inline Quat::Quat( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( 0.0f, 0.0f, 0.0f, 1.0f );
-}
-
-inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 )
-{
-    Quat start;
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = dot( unitQuat0, unitQuat1 );
-    if ( cosAngle < 0.0f ) {
-        cosAngle = -cosAngle;
-        start = ( -unitQuat0 );
-    } else {
-        start = unitQuat0;
-    }
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
-}
-
-inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
-}
-
-inline Quat & Quat::operator =( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Quat & Quat::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Quat::getX( ) const
-{
-    return mX;
-}
-
-inline Quat & Quat::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Quat::getY( ) const
-{
-    return mY;
-}
-
-inline Quat & Quat::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Quat::getZ( ) const
-{
-    return mZ;
-}
-
-inline Quat & Quat::setW( float _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline float Quat::getW( ) const
-{
-    return mW;
-}
-
-inline Quat & Quat::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Quat::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Quat::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Quat::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Quat Quat::operator +( const Quat & quat ) const
-{
-    return Quat(
-        ( mX + quat.mX ),
-        ( mY + quat.mY ),
-        ( mZ + quat.mZ ),
-        ( mW + quat.mW )
-    );
-}
-
-inline const Quat Quat::operator -( const Quat & quat ) const
-{
-    return Quat(
-        ( mX - quat.mX ),
-        ( mY - quat.mY ),
-        ( mZ - quat.mZ ),
-        ( mW - quat.mW )
-    );
-}
-
-inline const Quat Quat::operator *( float scalar ) const
-{
-    return Quat(
-        ( mX * scalar ),
-        ( mY * scalar ),
-        ( mZ * scalar ),
-        ( mW * scalar )
-    );
-}
-
-inline Quat & Quat::operator +=( const Quat & quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( const Quat & quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( float scalar ) const
-{
-    return Quat(
-        ( mX / scalar ),
-        ( mY / scalar ),
-        ( mZ / scalar ),
-        ( mW / scalar )
-    );
-}
-
-inline Quat & Quat::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat(
-        -mX,
-        -mY,
-        -mZ,
-        -mW
-    );
-}
-
-inline const Quat operator *( float scalar, const Quat & quat )
-{
-    return quat * scalar;
-}
-
-inline float dot( const Quat & quat0, const Quat & quat1 )
-{
-    float result;
-    result = ( quat0.getX() * quat1.getX() );
-    result = ( result + ( quat0.getY() * quat1.getY() ) );
-    result = ( result + ( quat0.getZ() * quat1.getZ() ) );
-    result = ( result + ( quat0.getW() * quat1.getW() ) );
-    return result;
-}
-
-inline float norm( const Quat & quat )
-{
-    float result;
-    result = ( quat.getX() * quat.getX() );
-    result = ( result + ( quat.getY() * quat.getY() ) );
-    result = ( result + ( quat.getZ() * quat.getZ() ) );
-    result = ( result + ( quat.getW() * quat.getW() ) );
-    return result;
-}
-
-inline float length( const Quat & quat )
-{
-    return sqrtf( norm( quat ) );
-}
-
-inline const Quat normalize( const Quat & quat )
-{
-    float lenSqr, lenInv;
-    lenSqr = norm( quat );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    return Quat(
-        ( quat.getX() * lenInv ),
-        ( quat.getY() * lenInv ),
-        ( quat.getZ() * lenInv ),
-        ( quat.getW() * lenInv )
-    );
-}
-
-inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    float cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
-    recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
-    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
-}
-
-inline const Quat Quat::rotation( float radians, const Vector3 & unitVec )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( ( unitVec * s ), c );
-}
-
-inline const Quat Quat::rotationX( float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( s, 0.0f, 0.0f, c );
-}
-
-inline const Quat Quat::rotationY( float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( 0.0f, s, 0.0f, c );
-}
-
-inline const Quat Quat::rotationZ( float radians )
-{
-    float s, c, angle;
-    angle = ( radians * 0.5f );
-    s = sinf( angle );
-    c = cosf( angle );
-    return Quat( 0.0f, 0.0f, s, c );
-}
-
-inline const Quat Quat::operator *( const Quat & quat ) const
-{
-    return Quat(
-        ( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
-        ( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
-        ( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
-        ( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
-    );
-}
-
-inline Quat & Quat::operator *=( const Quat & quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
-{
-    float tmpX, tmpY, tmpZ, tmpW;
-    tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
-    tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
-    tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
-    tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
-    return Vector3(
-        ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
-        ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
-        ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
-    );
-}
-
-inline const Quat conj( const Quat & quat )
-{
-    return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
-}
-
-inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 )
-{
-    return Quat(
-        ( select1 )? quat1.getX() : quat0.getX(),
-        ( select1 )? quat1.getY() : quat0.getY(),
-        ( select1 )? quat1.getZ() : quat0.getZ(),
-        ( select1 )? quat1.getW() : quat0.getW()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Quat & quat )
-{
-    printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
-}
-
-inline void print( const Quat & quat, const char * name )
-{
-    printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Quat::Quat( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+}
+
+inline Quat::Quat( float _x, float _y, float _z, float _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Quat::Quat( const Vector3 & xyz, float _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Quat::Quat( const Vector4 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = vec.getW();
+}
+
+inline Quat::Quat( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 )
+{
+    Quat start;
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitQuat0, unitQuat1 );
+    if ( cosAngle < 0.0f ) {
+        cosAngle = -cosAngle;
+        start = ( -unitQuat0 );
+    } else {
+        start = unitQuat0;
+    }
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
+}
+
+inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
+}
+
+inline Quat & Quat::operator =( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Quat & Quat::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Quat::getX( ) const
+{
+    return mX;
+}
+
+inline Quat & Quat::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Quat::getY( ) const
+{
+    return mY;
+}
+
+inline Quat & Quat::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Quat::getZ( ) const
+{
+    return mZ;
+}
+
+inline Quat & Quat::setW( float _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline float Quat::getW( ) const
+{
+    return mW;
+}
+
+inline Quat & Quat::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Quat::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Quat::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Quat::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Quat Quat::operator +( const Quat & quat ) const
+{
+    return Quat(
+        ( mX + quat.mX ),
+        ( mY + quat.mY ),
+        ( mZ + quat.mZ ),
+        ( mW + quat.mW )
+    );
+}
+
+inline const Quat Quat::operator -( const Quat & quat ) const
+{
+    return Quat(
+        ( mX - quat.mX ),
+        ( mY - quat.mY ),
+        ( mZ - quat.mZ ),
+        ( mW - quat.mW )
+    );
+}
+
+inline const Quat Quat::operator *( float scalar ) const
+{
+    return Quat(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar ),
+        ( mW * scalar )
+    );
+}
+
+inline Quat & Quat::operator +=( const Quat & quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( const Quat & quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( float scalar ) const
+{
+    return Quat(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar ),
+        ( mW / scalar )
+    );
+}
+
+inline Quat & Quat::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat(
+        -mX,
+        -mY,
+        -mZ,
+        -mW
+    );
+}
+
+inline const Quat operator *( float scalar, const Quat & quat )
+{
+    return quat * scalar;
+}
+
+inline float dot( const Quat & quat0, const Quat & quat1 )
+{
+    float result;
+    result = ( quat0.getX() * quat1.getX() );
+    result = ( result + ( quat0.getY() * quat1.getY() ) );
+    result = ( result + ( quat0.getZ() * quat1.getZ() ) );
+    result = ( result + ( quat0.getW() * quat1.getW() ) );
+    return result;
+}
+
+inline float norm( const Quat & quat )
+{
+    float result;
+    result = ( quat.getX() * quat.getX() );
+    result = ( result + ( quat.getY() * quat.getY() ) );
+    result = ( result + ( quat.getZ() * quat.getZ() ) );
+    result = ( result + ( quat.getW() * quat.getW() ) );
+    return result;
+}
+
+inline float length( const Quat & quat )
+{
+    return sqrtf( norm( quat ) );
+}
+
+inline const Quat normalize( const Quat & quat )
+{
+    float lenSqr, lenInv;
+    lenSqr = norm( quat );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Quat(
+        ( quat.getX() * lenInv ),
+        ( quat.getY() * lenInv ),
+        ( quat.getZ() * lenInv ),
+        ( quat.getW() * lenInv )
+    );
+}
+
+inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    float cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
+    recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
+    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
+}
+
+inline const Quat Quat::rotation( float radians, const Vector3 & unitVec )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( ( unitVec * s ), c );
+}
+
+inline const Quat Quat::rotationX( float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( s, 0.0f, 0.0f, c );
+}
+
+inline const Quat Quat::rotationY( float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( 0.0f, s, 0.0f, c );
+}
+
+inline const Quat Quat::rotationZ( float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( 0.0f, 0.0f, s, c );
+}
+
+inline const Quat Quat::operator *( const Quat & quat ) const
+{
+    return Quat(
+        ( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
+        ( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
+        ( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
+        ( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
+    );
+}
+
+inline Quat & Quat::operator *=( const Quat & quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
+{
+    float tmpX, tmpY, tmpZ, tmpW;
+    tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
+    tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
+    tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
+    tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
+    return Vector3(
+        ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
+        ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
+        ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
+    );
+}
+
+inline const Quat conj( const Quat & quat )
+{
+    return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
+}
+
+inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 )
+{
+    return Quat(
+        ( select1 )? quat1.getX() : quat0.getX(),
+        ( select1 )? quat1.getY() : quat0.getY(),
+        ( select1 )? quat1.getZ() : quat0.getZ(),
+        ( select1 )? quat1.getW() : quat0.getW()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Quat & quat )
+{
+    printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
+}
+
+inline void print( const Quat & quat, const char * name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vec_aos.h
index a1a75333a..6b1b8cee3 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vec_aos.h
@@ -1,1173 +1,1173 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_CPP_H
-#define _VECTORMATH_VEC_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Constants
-
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Vector3::Vector3( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-}
-
-inline Vector3::Vector3( float _x, float _y, float _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Vector3::Vector3( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-}
-
-inline Vector3::Vector3( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( 1.0f, 0.0f, 0.0f );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( 0.0f, 1.0f, 0.0f );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( 0.0f, 0.0f, 1.0f );
-}
-
-inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = dot( unitVec0, unitVec1 );
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline Vector3 & Vector3::operator =( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Vector3::getX( ) const
-{
-    return mX;
-}
-
-inline Vector3 & Vector3::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Vector3::getY( ) const
-{
-    return mY;
-}
-
-inline Vector3 & Vector3::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Vector3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector3 & Vector3::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Vector3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Vector3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Vector3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( mX + vec.mX ),
-        ( mY + vec.mY ),
-        ( mZ + vec.mZ )
-    );
-}
-
-inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
-{
-    return Vector3(
-        ( mX - vec.mX ),
-        ( mY - vec.mY ),
-        ( mZ - vec.mZ )
-    );
-}
-
-inline const Point3 Vector3::operator +( const Point3 & pnt ) const
-{
-    return Point3(
-        ( mX + pnt.getX() ),
-        ( mY + pnt.getY() ),
-        ( mZ + pnt.getZ() )
-    );
-}
-
-inline const Vector3 Vector3::operator *( float scalar ) const
-{
-    return Vector3(
-        ( mX * scalar ),
-        ( mY * scalar ),
-        ( mZ * scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( float scalar ) const
-{
-    return Vector3(
-        ( mX / scalar ),
-        ( mY / scalar ),
-        ( mZ / scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3(
-        -mX,
-        -mY,
-        -mZ
-    );
-}
-
-inline const Vector3 operator *( float scalar, const Vector3 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( vec0.getX() * vec1.getX() ),
-        ( vec0.getY() * vec1.getY() ),
-        ( vec0.getZ() * vec1.getZ() )
-    );
-}
-
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( vec0.getX() / vec1.getX() ),
-        ( vec0.getY() / vec1.getY() ),
-        ( vec0.getZ() / vec1.getZ() )
-    );
-}
-
-inline const Vector3 recipPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        ( 1.0f / vec.getX() ),
-        ( 1.0f / vec.getY() ),
-        ( 1.0f / vec.getZ() )
-    );
-}
-
-inline const Vector3 sqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        sqrtf( vec.getX() ),
-        sqrtf( vec.getY() ),
-        sqrtf( vec.getZ() )
-    );
-}
-
-inline const Vector3 rsqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        ( 1.0f / sqrtf( vec.getX() ) ),
-        ( 1.0f / sqrtf( vec.getY() ) ),
-        ( 1.0f / sqrtf( vec.getZ() ) )
-    );
-}
-
-inline const Vector3 absPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        fabsf( vec.getX() ),
-        fabsf( vec.getY() ),
-        fabsf( vec.getZ() )
-    );
-}
-
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
-        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
-        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() )
-    );
-}
-
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ()
-    );
-}
-
-inline float maxElem( const Vector3 & vec )
-{
-    float result;
-    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() > result)? vec.getZ() : result;
-    return result;
-}
-
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ()
-    );
-}
-
-inline float minElem( const Vector3 & vec )
-{
-    float result;
-    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() < result)? vec.getZ() : result;
-    return result;
-}
-
-inline float sum( const Vector3 & vec )
-{
-    float result;
-    result = ( vec.getX() + vec.getY() );
-    result = ( result + vec.getZ() );
-    return result;
-}
-
-inline float dot( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    float result;
-    result = ( vec0.getX() * vec1.getX() );
-    result = ( result + ( vec0.getY() * vec1.getY() ) );
-    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
-    return result;
-}
-
-inline float lengthSqr( const Vector3 & vec )
-{
-    float result;
-    result = ( vec.getX() * vec.getX() );
-    result = ( result + ( vec.getY() * vec.getY() ) );
-    result = ( result + ( vec.getZ() * vec.getZ() ) );
-    return result;
-}
-
-inline float length( const Vector3 & vec )
-{
-    return sqrtf( lengthSqr( vec ) );
-}
-
-inline const Vector3 normalize( const Vector3 & vec )
-{
-    float lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    return Vector3(
-        ( vec.getX() * lenInv ),
-        ( vec.getY() * lenInv ),
-        ( vec.getZ() * lenInv )
-    );
-}
-
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ),
-        ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ),
-        ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) )
-    );
-}
-
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 )
-{
-    return Vector3(
-        ( select1 )? vec1.getX() : vec0.getX(),
-        ( select1 )? vec1.getY() : vec0.getY(),
-        ( select1 )? vec1.getZ() : vec0.getZ()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector3 & vec )
-{
-    printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() );
-}
-
-inline void print( const Vector3 & vec, const char * name )
-{
-    printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() );
-}
-
-#endif
-
-inline Vector4::Vector4( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-}
-
-inline Vector4::Vector4( float _x, float _y, float _z, float _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Vector4::Vector4( const Vector3 & xyz, float _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Vector4::Vector4( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = 0.0f;
-}
-
-inline Vector4::Vector4( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-    mW = 1.0f;
-}
-
-inline Vector4::Vector4( const Quat & quat )
-{
-    mX = quat.getX();
-    mY = quat.getY();
-    mZ = quat.getZ();
-    mW = quat.getW();
-}
-
-inline Vector4::Vector4( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( 1.0f, 0.0f, 0.0f, 0.0f );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( 0.0f, 1.0f, 0.0f, 0.0f );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( 0.0f, 0.0f, 1.0f, 0.0f );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( 0.0f, 0.0f, 0.0f, 1.0f );
-}
-
-inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
-{
-    float recipSinAngle, scale0, scale1, cosAngle, angle;
-    cosAngle = dot( unitVec0, unitVec1 );
-    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
-        angle = acosf( cosAngle );
-        recipSinAngle = ( 1.0f / sinf( angle ) );
-        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
-        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
-    } else {
-        scale0 = ( 1.0f - t );
-        scale1 = t;
-    }
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline Vector4 & Vector4::operator =( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Vector4 & Vector4::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Vector4::getX( ) const
-{
-    return mX;
-}
-
-inline Vector4 & Vector4::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Vector4::getY( ) const
-{
-    return mY;
-}
-
-inline Vector4 & Vector4::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Vector4::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector4 & Vector4::setW( float _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline float Vector4::getW( ) const
-{
-    return mW;
-}
-
-inline Vector4 & Vector4::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Vector4::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Vector4::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Vector4::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
-{
-    return Vector4(
-        ( mX + vec.mX ),
-        ( mY + vec.mY ),
-        ( mZ + vec.mZ ),
-        ( mW + vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
-{
-    return Vector4(
-        ( mX - vec.mX ),
-        ( mY - vec.mY ),
-        ( mZ - vec.mZ ),
-        ( mW - vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator *( float scalar ) const
-{
-    return Vector4(
-        ( mX * scalar ),
-        ( mY * scalar ),
-        ( mZ * scalar ),
-        ( mW * scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator +=( const Vector4 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( const Vector4 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( float scalar ) const
-{
-    return Vector4(
-        ( mX / scalar ),
-        ( mY / scalar ),
-        ( mZ / scalar ),
-        ( mW / scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4(
-        -mX,
-        -mY,
-        -mZ,
-        -mW
-    );
-}
-
-inline const Vector4 operator *( float scalar, const Vector4 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        ( vec0.getX() * vec1.getX() ),
-        ( vec0.getY() * vec1.getY() ),
-        ( vec0.getZ() * vec1.getZ() ),
-        ( vec0.getW() * vec1.getW() )
-    );
-}
-
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        ( vec0.getX() / vec1.getX() ),
-        ( vec0.getY() / vec1.getY() ),
-        ( vec0.getZ() / vec1.getZ() ),
-        ( vec0.getW() / vec1.getW() )
-    );
-}
-
-inline const Vector4 recipPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        ( 1.0f / vec.getX() ),
-        ( 1.0f / vec.getY() ),
-        ( 1.0f / vec.getZ() ),
-        ( 1.0f / vec.getW() )
-    );
-}
-
-inline const Vector4 sqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        sqrtf( vec.getX() ),
-        sqrtf( vec.getY() ),
-        sqrtf( vec.getZ() ),
-        sqrtf( vec.getW() )
-    );
-}
-
-inline const Vector4 rsqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        ( 1.0f / sqrtf( vec.getX() ) ),
-        ( 1.0f / sqrtf( vec.getY() ) ),
-        ( 1.0f / sqrtf( vec.getZ() ) ),
-        ( 1.0f / sqrtf( vec.getW() ) )
-    );
-}
-
-inline const Vector4 absPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        fabsf( vec.getX() ),
-        fabsf( vec.getY() ),
-        fabsf( vec.getZ() ),
-        fabsf( vec.getW() )
-    );
-}
-
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
-        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
-        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ),
-        ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() )
-    );
-}
-
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(),
-        (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW()
-    );
-}
-
-inline float maxElem( const Vector4 & vec )
-{
-    float result;
-    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() > result)? vec.getZ() : result;
-    result = (vec.getW() > result)? vec.getW() : result;
-    return result;
-}
-
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
-        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
-        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(),
-        (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW()
-    );
-}
-
-inline float minElem( const Vector4 & vec )
-{
-    float result;
-    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
-    result = (vec.getZ() < result)? vec.getZ() : result;
-    result = (vec.getW() < result)? vec.getW() : result;
-    return result;
-}
-
-inline float sum( const Vector4 & vec )
-{
-    float result;
-    result = ( vec.getX() + vec.getY() );
-    result = ( result + vec.getZ() );
-    result = ( result + vec.getW() );
-    return result;
-}
-
-inline float dot( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    float result;
-    result = ( vec0.getX() * vec1.getX() );
-    result = ( result + ( vec0.getY() * vec1.getY() ) );
-    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
-    result = ( result + ( vec0.getW() * vec1.getW() ) );
-    return result;
-}
-
-inline float lengthSqr( const Vector4 & vec )
-{
-    float result;
-    result = ( vec.getX() * vec.getX() );
-    result = ( result + ( vec.getY() * vec.getY() ) );
-    result = ( result + ( vec.getZ() * vec.getZ() ) );
-    result = ( result + ( vec.getW() * vec.getW() ) );
-    return result;
-}
-
-inline float length( const Vector4 & vec )
-{
-    return sqrtf( lengthSqr( vec ) );
-}
-
-inline const Vector4 normalize( const Vector4 & vec )
-{
-    float lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = ( 1.0f / sqrtf( lenSqr ) );
-    return Vector4(
-        ( vec.getX() * lenInv ),
-        ( vec.getY() * lenInv ),
-        ( vec.getZ() * lenInv ),
-        ( vec.getW() * lenInv )
-    );
-}
-
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 )
-{
-    return Vector4(
-        ( select1 )? vec1.getX() : vec0.getX(),
-        ( select1 )? vec1.getY() : vec0.getY(),
-        ( select1 )? vec1.getZ() : vec0.getZ(),
-        ( select1 )? vec1.getW() : vec0.getW()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector4 & vec )
-{
-    printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
-}
-
-inline void print( const Vector4 & vec, const char * name )
-{
-    printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
-}
-
-#endif
-
-inline Point3::Point3( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-}
-
-inline Point3::Point3( float _x, float _y, float _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Point3::Point3( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-}
-
-inline Point3::Point3( float scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline Point3 & Point3::operator =( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-    return *this;
-}
-
-inline Point3 & Point3::setX( float _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline float Point3::getX( ) const
-{
-    return mX;
-}
-
-inline Point3 & Point3::setY( float _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline float Point3::getY( ) const
-{
-    return mY;
-}
-
-inline Point3 & Point3::setZ( float _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline float Point3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Point3 & Point3::setElem( int idx, float value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline float Point3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline float & Point3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline float Point3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Point3::operator -( const Point3 & pnt ) const
-{
-    return Vector3(
-        ( mX - pnt.mX ),
-        ( mY - pnt.mY ),
-        ( mZ - pnt.mZ )
-    );
-}
-
-inline const Point3 Point3::operator +( const Vector3 & vec ) const
-{
-    return Point3(
-        ( mX + vec.getX() ),
-        ( mY + vec.getY() ),
-        ( mZ + vec.getZ() )
-    );
-}
-
-inline const Point3 Point3::operator -( const Vector3 & vec ) const
-{
-    return Point3(
-        ( mX - vec.getX() ),
-        ( mY - vec.getY() ),
-        ( mZ - vec.getZ() )
-    );
-}
-
-inline Point3 & Point3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        ( pnt0.getX() * pnt1.getX() ),
-        ( pnt0.getY() * pnt1.getY() ),
-        ( pnt0.getZ() * pnt1.getZ() )
-    );
-}
-
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        ( pnt0.getX() / pnt1.getX() ),
-        ( pnt0.getY() / pnt1.getY() ),
-        ( pnt0.getZ() / pnt1.getZ() )
-    );
-}
-
-inline const Point3 recipPerElem( const Point3 & pnt )
-{
-    return Point3(
-        ( 1.0f / pnt.getX() ),
-        ( 1.0f / pnt.getY() ),
-        ( 1.0f / pnt.getZ() )
-    );
-}
-
-inline const Point3 sqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        sqrtf( pnt.getX() ),
-        sqrtf( pnt.getY() ),
-        sqrtf( pnt.getZ() )
-    );
-}
-
-inline const Point3 rsqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        ( 1.0f / sqrtf( pnt.getX() ) ),
-        ( 1.0f / sqrtf( pnt.getY() ) ),
-        ( 1.0f / sqrtf( pnt.getZ() ) )
-    );
-}
-
-inline const Point3 absPerElem( const Point3 & pnt )
-{
-    return Point3(
-        fabsf( pnt.getX() ),
-        fabsf( pnt.getY() ),
-        fabsf( pnt.getZ() )
-    );
-}
-
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ),
-        ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ),
-        ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() )
-    );
-}
-
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(),
-        (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(),
-        (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
-    );
-}
-
-inline float maxElem( const Point3 & pnt )
-{
-    float result;
-    result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY();
-    result = (pnt.getZ() > result)? pnt.getZ() : result;
-    return result;
-}
-
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(),
-        (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(),
-        (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
-    );
-}
-
-inline float minElem( const Point3 & pnt )
-{
-    float result;
-    result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY();
-    result = (pnt.getZ() < result)? pnt.getZ() : result;
-    return result;
-}
-
-inline float sum( const Point3 & pnt )
-{
-    float result;
-    result = ( pnt.getX() + pnt.getY() );
-    result = ( result + pnt.getZ() );
-    return result;
-}
-
-inline const Point3 scale( const Point3 & pnt, float scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline float projection( const Point3 & pnt, const Vector3 & unitVec )
-{
-    float result;
-    result = ( pnt.getX() * unitVec.getX() );
-    result = ( result + ( pnt.getY() * unitVec.getY() ) );
-    result = ( result + ( pnt.getZ() * unitVec.getZ() ) );
-    return result;
-}
-
-inline float distSqrFromOrigin( const Point3 & pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline float distFromOrigin( const Point3 & pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline float dist( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 )
-{
-    return Point3(
-        ( select1 )? pnt1.getX() : pnt0.getX(),
-        ( select1 )? pnt1.getY() : pnt0.getY(),
-        ( select1 )? pnt1.getZ() : pnt0.getZ()
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Point3 & pnt )
-{
-    printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() );
-}
-
-inline void print( const Point3 & pnt, const char * name )
-{
-    printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Vector3::Vector3( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+}
+
+inline Vector3::Vector3( float _x, float _y, float _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Vector3::Vector3( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+}
+
+inline Vector3::Vector3( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( 1.0f, 0.0f, 0.0f );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( 0.0f, 1.0f, 0.0f );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( 0.0f, 0.0f, 1.0f );
+}
+
+inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline Vector3 & Vector3::operator =( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Vector3::getX( ) const
+{
+    return mX;
+}
+
+inline Vector3 & Vector3::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Vector3::getY( ) const
+{
+    return mY;
+}
+
+inline Vector3 & Vector3::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Vector3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector3 & Vector3::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Vector3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Vector3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Vector3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( mX + vec.mX ),
+        ( mY + vec.mY ),
+        ( mZ + vec.mZ )
+    );
+}
+
+inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( mX - vec.mX ),
+        ( mY - vec.mY ),
+        ( mZ - vec.mZ )
+    );
+}
+
+inline const Point3 Vector3::operator +( const Point3 & pnt ) const
+{
+    return Point3(
+        ( mX + pnt.getX() ),
+        ( mY + pnt.getY() ),
+        ( mZ + pnt.getZ() )
+    );
+}
+
+inline const Vector3 Vector3::operator *( float scalar ) const
+{
+    return Vector3(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( float scalar ) const
+{
+    return Vector3(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3(
+        -mX,
+        -mY,
+        -mZ
+    );
+}
+
+inline const Vector3 operator *( float scalar, const Vector3 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec0.getX() * vec1.getX() ),
+        ( vec0.getY() * vec1.getY() ),
+        ( vec0.getZ() * vec1.getZ() )
+    );
+}
+
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec0.getX() / vec1.getX() ),
+        ( vec0.getY() / vec1.getY() ),
+        ( vec0.getZ() / vec1.getZ() )
+    );
+}
+
+inline const Vector3 recipPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        ( 1.0f / vec.getX() ),
+        ( 1.0f / vec.getY() ),
+        ( 1.0f / vec.getZ() )
+    );
+}
+
+inline const Vector3 sqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        sqrtf( vec.getX() ),
+        sqrtf( vec.getY() ),
+        sqrtf( vec.getZ() )
+    );
+}
+
+inline const Vector3 rsqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        ( 1.0f / sqrtf( vec.getX() ) ),
+        ( 1.0f / sqrtf( vec.getY() ) ),
+        ( 1.0f / sqrtf( vec.getZ() ) )
+    );
+}
+
+inline const Vector3 absPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        fabsf( vec.getX() ),
+        fabsf( vec.getY() ),
+        fabsf( vec.getZ() )
+    );
+}
+
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
+        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
+        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() )
+    );
+}
+
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ()
+    );
+}
+
+inline float maxElem( const Vector3 & vec )
+{
+    float result;
+    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() > result)? vec.getZ() : result;
+    return result;
+}
+
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ()
+    );
+}
+
+inline float minElem( const Vector3 & vec )
+{
+    float result;
+    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() < result)? vec.getZ() : result;
+    return result;
+}
+
+inline float sum( const Vector3 & vec )
+{
+    float result;
+    result = ( vec.getX() + vec.getY() );
+    result = ( result + vec.getZ() );
+    return result;
+}
+
+inline float dot( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    float result;
+    result = ( vec0.getX() * vec1.getX() );
+    result = ( result + ( vec0.getY() * vec1.getY() ) );
+    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
+    return result;
+}
+
+inline float lengthSqr( const Vector3 & vec )
+{
+    float result;
+    result = ( vec.getX() * vec.getX() );
+    result = ( result + ( vec.getY() * vec.getY() ) );
+    result = ( result + ( vec.getZ() * vec.getZ() ) );
+    return result;
+}
+
+inline float length( const Vector3 & vec )
+{
+    return sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( const Vector3 & vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Vector3(
+        ( vec.getX() * lenInv ),
+        ( vec.getY() * lenInv ),
+        ( vec.getZ() * lenInv )
+    );
+}
+
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ),
+        ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ),
+        ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) )
+    );
+}
+
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 )
+{
+    return Vector3(
+        ( select1 )? vec1.getX() : vec0.getX(),
+        ( select1 )? vec1.getY() : vec0.getY(),
+        ( select1 )? vec1.getZ() : vec0.getZ()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector3 & vec )
+{
+    printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() );
+}
+
+inline void print( const Vector3 & vec, const char * name )
+{
+    printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() );
+}
+
+#endif
+
+inline Vector4::Vector4( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+}
+
+inline Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Vector4::Vector4( const Vector3 & xyz, float _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Vector4::Vector4( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = 0.0f;
+}
+
+inline Vector4::Vector4( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+    mW = 1.0f;
+}
+
+inline Vector4::Vector4( const Quat & quat )
+{
+    mX = quat.getX();
+    mY = quat.getY();
+    mZ = quat.getZ();
+    mW = quat.getW();
+}
+
+inline Vector4::Vector4( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( 1.0f, 0.0f, 0.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( 0.0f, 1.0f, 0.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( 0.0f, 0.0f, 1.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
+{
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline Vector4 & Vector4::operator =( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Vector4 & Vector4::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Vector4::getX( ) const
+{
+    return mX;
+}
+
+inline Vector4 & Vector4::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Vector4::getY( ) const
+{
+    return mY;
+}
+
+inline Vector4 & Vector4::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Vector4::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector4 & Vector4::setW( float _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline float Vector4::getW( ) const
+{
+    return mW;
+}
+
+inline Vector4 & Vector4::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Vector4::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Vector4::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Vector4::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( mX + vec.mX ),
+        ( mY + vec.mY ),
+        ( mZ + vec.mZ ),
+        ( mW + vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( mX - vec.mX ),
+        ( mY - vec.mY ),
+        ( mZ - vec.mZ ),
+        ( mW - vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator *( float scalar ) const
+{
+    return Vector4(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar ),
+        ( mW * scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator +=( const Vector4 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( const Vector4 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( float scalar ) const
+{
+    return Vector4(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar ),
+        ( mW / scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4(
+        -mX,
+        -mY,
+        -mZ,
+        -mW
+    );
+}
+
+inline const Vector4 operator *( float scalar, const Vector4 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec0.getX() * vec1.getX() ),
+        ( vec0.getY() * vec1.getY() ),
+        ( vec0.getZ() * vec1.getZ() ),
+        ( vec0.getW() * vec1.getW() )
+    );
+}
+
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec0.getX() / vec1.getX() ),
+        ( vec0.getY() / vec1.getY() ),
+        ( vec0.getZ() / vec1.getZ() ),
+        ( vec0.getW() / vec1.getW() )
+    );
+}
+
+inline const Vector4 recipPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        ( 1.0f / vec.getX() ),
+        ( 1.0f / vec.getY() ),
+        ( 1.0f / vec.getZ() ),
+        ( 1.0f / vec.getW() )
+    );
+}
+
+inline const Vector4 sqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        sqrtf( vec.getX() ),
+        sqrtf( vec.getY() ),
+        sqrtf( vec.getZ() ),
+        sqrtf( vec.getW() )
+    );
+}
+
+inline const Vector4 rsqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        ( 1.0f / sqrtf( vec.getX() ) ),
+        ( 1.0f / sqrtf( vec.getY() ) ),
+        ( 1.0f / sqrtf( vec.getZ() ) ),
+        ( 1.0f / sqrtf( vec.getW() ) )
+    );
+}
+
+inline const Vector4 absPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        fabsf( vec.getX() ),
+        fabsf( vec.getY() ),
+        fabsf( vec.getZ() ),
+        fabsf( vec.getW() )
+    );
+}
+
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
+        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
+        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ),
+        ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() )
+    );
+}
+
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(),
+        (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW()
+    );
+}
+
+inline float maxElem( const Vector4 & vec )
+{
+    float result;
+    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() > result)? vec.getZ() : result;
+    result = (vec.getW() > result)? vec.getW() : result;
+    return result;
+}
+
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(),
+        (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW()
+    );
+}
+
+inline float minElem( const Vector4 & vec )
+{
+    float result;
+    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() < result)? vec.getZ() : result;
+    result = (vec.getW() < result)? vec.getW() : result;
+    return result;
+}
+
+inline float sum( const Vector4 & vec )
+{
+    float result;
+    result = ( vec.getX() + vec.getY() );
+    result = ( result + vec.getZ() );
+    result = ( result + vec.getW() );
+    return result;
+}
+
+inline float dot( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    float result;
+    result = ( vec0.getX() * vec1.getX() );
+    result = ( result + ( vec0.getY() * vec1.getY() ) );
+    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
+    result = ( result + ( vec0.getW() * vec1.getW() ) );
+    return result;
+}
+
+inline float lengthSqr( const Vector4 & vec )
+{
+    float result;
+    result = ( vec.getX() * vec.getX() );
+    result = ( result + ( vec.getY() * vec.getY() ) );
+    result = ( result + ( vec.getZ() * vec.getZ() ) );
+    result = ( result + ( vec.getW() * vec.getW() ) );
+    return result;
+}
+
+inline float length( const Vector4 & vec )
+{
+    return sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( const Vector4 & vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Vector4(
+        ( vec.getX() * lenInv ),
+        ( vec.getY() * lenInv ),
+        ( vec.getZ() * lenInv ),
+        ( vec.getW() * lenInv )
+    );
+}
+
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 )
+{
+    return Vector4(
+        ( select1 )? vec1.getX() : vec0.getX(),
+        ( select1 )? vec1.getY() : vec0.getY(),
+        ( select1 )? vec1.getZ() : vec0.getZ(),
+        ( select1 )? vec1.getW() : vec0.getW()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector4 & vec )
+{
+    printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
+}
+
+inline void print( const Vector4 & vec, const char * name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
+}
+
+#endif
+
+inline Point3::Point3( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+}
+
+inline Point3::Point3( float _x, float _y, float _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Point3::Point3( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+}
+
+inline Point3::Point3( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline Point3 & Point3::operator =( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+    return *this;
+}
+
+inline Point3 & Point3::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Point3::getX( ) const
+{
+    return mX;
+}
+
+inline Point3 & Point3::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Point3::getY( ) const
+{
+    return mY;
+}
+
+inline Point3 & Point3::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Point3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Point3 & Point3::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Point3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Point3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Point3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Point3::operator -( const Point3 & pnt ) const
+{
+    return Vector3(
+        ( mX - pnt.mX ),
+        ( mY - pnt.mY ),
+        ( mZ - pnt.mZ )
+    );
+}
+
+inline const Point3 Point3::operator +( const Vector3 & vec ) const
+{
+    return Point3(
+        ( mX + vec.getX() ),
+        ( mY + vec.getY() ),
+        ( mZ + vec.getZ() )
+    );
+}
+
+inline const Point3 Point3::operator -( const Vector3 & vec ) const
+{
+    return Point3(
+        ( mX - vec.getX() ),
+        ( mY - vec.getY() ),
+        ( mZ - vec.getZ() )
+    );
+}
+
+inline Point3 & Point3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt0.getX() * pnt1.getX() ),
+        ( pnt0.getY() * pnt1.getY() ),
+        ( pnt0.getZ() * pnt1.getZ() )
+    );
+}
+
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt0.getX() / pnt1.getX() ),
+        ( pnt0.getY() / pnt1.getY() ),
+        ( pnt0.getZ() / pnt1.getZ() )
+    );
+}
+
+inline const Point3 recipPerElem( const Point3 & pnt )
+{
+    return Point3(
+        ( 1.0f / pnt.getX() ),
+        ( 1.0f / pnt.getY() ),
+        ( 1.0f / pnt.getZ() )
+    );
+}
+
+inline const Point3 sqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        sqrtf( pnt.getX() ),
+        sqrtf( pnt.getY() ),
+        sqrtf( pnt.getZ() )
+    );
+}
+
+inline const Point3 rsqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        ( 1.0f / sqrtf( pnt.getX() ) ),
+        ( 1.0f / sqrtf( pnt.getY() ) ),
+        ( 1.0f / sqrtf( pnt.getZ() ) )
+    );
+}
+
+inline const Point3 absPerElem( const Point3 & pnt )
+{
+    return Point3(
+        fabsf( pnt.getX() ),
+        fabsf( pnt.getY() ),
+        fabsf( pnt.getZ() )
+    );
+}
+
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ),
+        ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ),
+        ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() )
+    );
+}
+
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(),
+        (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(),
+        (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
+    );
+}
+
+inline float maxElem( const Point3 & pnt )
+{
+    float result;
+    result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY();
+    result = (pnt.getZ() > result)? pnt.getZ() : result;
+    return result;
+}
+
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(),
+        (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(),
+        (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
+    );
+}
+
+inline float minElem( const Point3 & pnt )
+{
+    float result;
+    result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY();
+    result = (pnt.getZ() < result)? pnt.getZ() : result;
+    return result;
+}
+
+inline float sum( const Point3 & pnt )
+{
+    float result;
+    result = ( pnt.getX() + pnt.getY() );
+    result = ( result + pnt.getZ() );
+    return result;
+}
+
+inline const Point3 scale( const Point3 & pnt, float scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline float projection( const Point3 & pnt, const Vector3 & unitVec )
+{
+    float result;
+    result = ( pnt.getX() * unitVec.getX() );
+    result = ( result + ( pnt.getY() * unitVec.getY() ) );
+    result = ( result + ( pnt.getZ() * unitVec.getZ() ) );
+    return result;
+}
+
+inline float distSqrFromOrigin( const Point3 & pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline float distFromOrigin( const Point3 & pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline float dist( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 )
+{
+    return Point3(
+        ( select1 )? pnt1.getX() : pnt0.getX(),
+        ( select1 )? pnt1.getY() : pnt0.getY(),
+        ( select1 )? pnt1.getZ() : pnt0.getZ()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Point3 & pnt )
+{
+    printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() );
+}
+
+inline void print( const Point3 & pnt, const char * name )
+{
+    printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vectormath_aos.h
index a99f56372..a41d22ad2 100644
--- a/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/scalar/cpp/vectormath_aos.h
@@ -1,1809 +1,1809 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_CPP_H
-#define _VECTORMATH_AOS_CPP_H
-
-#include <math.h>
-
-#ifdef _VECTORMATH_DEBUG
-#include <stdio.h>
-#endif
-
-namespace Vectormath {
-
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A 3-D vector in array-of-structures format
-//
-class Vector3
-{
-    float mX;
-    float mY;
-    float mZ;
-#ifndef __GNUC__
-    float d;
-#endif
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Copy a 3-D vector
-    // 
-    inline Vector3( const Vector3 & vec );
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( float x, float y, float z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( const Point3 & pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( float scalar );
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( const Vector3 & vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( float x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( float y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( float z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline float getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, float value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( const Vector3 & vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( const Point3 & pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( float scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( const Vector3 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( float scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( float scalar, const Vector3 & vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( const Vector3 & vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( const Vector3 & vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( const Vector3 & vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( const Vector3 & vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline float maxElem( const Vector3 & vec );
-
-// Minimum element of a 3-D vector
-// 
-inline float minElem( const Vector3 & vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline float sum( const Vector3 & vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline float dot( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline float lengthSqr( const Vector3 & vec );
-
-// Compute the length of a 3-D vector
-// 
-inline float length( const Vector3 & vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( const Vector3 & vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// 
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( const Vector3 & vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// 
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec, const char * name );
-
-#endif
-
-// A 4-D vector in array-of-structures format
-//
-class Vector4
-{
-    float mX;
-    float mY;
-    float mZ;
-    float mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Copy a 4-D vector
-    // 
-    inline Vector4( const Vector4 & vec );
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( float x, float y, float z, float w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( const Vector3 & xyz, float w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( const Vector3 & vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( const Point3 & pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( const Quat & quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( float scalar );
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( const Vector4 & vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( float x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( float y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( float z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( float w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline float getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline float getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, float value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( const Vector4 & vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( const Vector4 & vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( float scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( const Vector4 & vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( const Vector4 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( float scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( float scalar, const Vector4 & vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( const Vector4 & vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( const Vector4 & vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( const Vector4 & vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( const Vector4 & vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline float maxElem( const Vector4 & vec );
-
-// Minimum element of a 4-D vector
-// 
-inline float minElem( const Vector4 & vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline float sum( const Vector4 & vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline float dot( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline float lengthSqr( const Vector4 & vec );
-
-// Compute the length of a 4-D vector
-// 
-inline float length( const Vector4 & vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( const Vector4 & vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// 
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec, const char * name );
-
-#endif
-
-// A 3-D point in array-of-structures format
-//
-class Point3
-{
-    float mX;
-    float mY;
-    float mZ;
-#ifndef __GNUC__
-    float d;
-#endif
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Copy a 3-D point
-    // 
-    inline Point3( const Point3 & pnt );
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( float x, float y, float z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( const Vector3 & vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( float scalar );
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( const Point3 & pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( float x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( float y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( float z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline float getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, float value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( const Point3 & pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( const Vector3 & vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( const Vector3 & vec );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( const Point3 & pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( const Point3 & pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( const Point3 & pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( const Point3 & pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline float maxElem( const Point3 & pnt );
-
-// Minimum element of a 3-D point
-// 
-inline float minElem( const Point3 & pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline float sum( const Point3 & pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, float scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline float projection( const Point3 & pnt, const Vector3 & unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline float distSqrFromOrigin( const Point3 & pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline float distFromOrigin( const Point3 & pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline float dist( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 );
-
-// Conditionally select between two 3-D points
-// 
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt, const char * name );
-
-#endif
-
-// A quaternion in array-of-structures format
-//
-class Quat
-{
-    float mX;
-    float mY;
-    float mZ;
-    float mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Copy a quaternion
-    // 
-    inline Quat( const Quat & quat );
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( float x, float y, float z, float w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( const Vector3 & xyz, float w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( const Vector4 & vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( float scalar );
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( const Quat & quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( float x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( float y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( float z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( float w );
-
-    // Get the x element of a quaternion
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline float getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline float getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, float value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline float & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( const Quat & quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( const Quat & quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( const Quat & quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( float scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( const Quat & quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( float scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( float radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( float radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( float radians );
-
-}
-#ifdef __GNUC__
-__attribute__ ((aligned(16)))
-#endif
-;
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( float scalar, const Quat & quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( const Quat & quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
-
-// Compute the dot product of two quaternions
-// 
-inline float dot( const Quat & quat0, const Quat & quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline float norm( const Quat & quat );
-
-// Compute the length of a quaternion
-// 
-inline float length( const Quat & quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( const Quat & quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
-
-// Conditionally select between two quaternions
-// 
-inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat, const char * name );
-
-#endif
-
-// A 3x3 matrix in array-of-structures format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( const Quat & unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( float scalar );
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( const Vector3 & col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, const Vector3 & vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, float val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( float scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( float radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( float radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( float radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( const Vector3 & scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline float determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A 4x4 matrix in array-of-structures format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( float scalar );
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( const Vector4 & col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( const Vector4 & col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( const Vector4 & col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( const Vector4 & col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, const Vector4 & vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, float val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( float scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( const Vector4 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( float radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( float radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( float radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( const Quat & unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( const Vector3 & scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( const Vector3 & translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline float determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A 3x4 transformation matrix in array-of-structures format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( float scalar );
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( const Vector3 & col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( const Vector3 & col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, float val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( float radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( const Vector3 & scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( const Vector3 & translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_SCALAR_H
+#define _VECTORMATH_AOS_CPP_SCALAR_H
+
+#include <math.h>
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    float mX;
+    float mY;
+    float mZ;
+#ifndef __GNUC__
+    float d;
+#endif
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Copy a 3-D vector
+    // 
+    inline Vector3( const Vector3 & vec );
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( float x, float y, float z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( const Point3 & pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( float scalar );
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( const Vector3 & vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( float z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( const Vector3 & vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( const Point3 & pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( const Vector3 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( float scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( float scalar, const Vector3 & vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( const Vector3 & vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( const Vector3 & vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( const Vector3 & vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( const Vector3 & vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline float maxElem( const Vector3 & vec );
+
+// Minimum element of a 3-D vector
+// 
+inline float minElem( const Vector3 & vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline float sum( const Vector3 & vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline float dot( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline float lengthSqr( const Vector3 & vec );
+
+// Compute the length of a 3-D vector
+// 
+inline float length( const Vector3 & vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( const Vector3 & vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// 
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( const Vector3 & vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// 
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    float mX;
+    float mY;
+    float mZ;
+    float mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Copy a 4-D vector
+    // 
+    inline Vector4( const Vector4 & vec );
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( const Vector3 & xyz, float w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( const Vector3 & vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( const Point3 & pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( const Quat & quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( float scalar );
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( const Vector4 & vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( float w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( const Vector4 & vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( const Vector4 & vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( const Vector4 & vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( const Vector4 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( float scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( float scalar, const Vector4 & vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( const Vector4 & vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( const Vector4 & vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( const Vector4 & vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( const Vector4 & vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline float maxElem( const Vector4 & vec );
+
+// Minimum element of a 4-D vector
+// 
+inline float minElem( const Vector4 & vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline float sum( const Vector4 & vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline float dot( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline float lengthSqr( const Vector4 & vec );
+
+// Compute the length of a 4-D vector
+// 
+inline float length( const Vector4 & vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( const Vector4 & vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// 
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    float mX;
+    float mY;
+    float mZ;
+#ifndef __GNUC__
+    float d;
+#endif
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Copy a 3-D point
+    // 
+    inline Point3( const Point3 & pnt );
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( float x, float y, float z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( const Vector3 & vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( float scalar );
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( const Point3 & pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( float z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( const Point3 & pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( const Vector3 & vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( const Vector3 & vec );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( const Point3 & pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( const Point3 & pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( const Point3 & pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( const Point3 & pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline float maxElem( const Point3 & pnt );
+
+// Minimum element of a 3-D point
+// 
+inline float minElem( const Point3 & pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline float sum( const Point3 & pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, float scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline float projection( const Point3 & pnt, const Vector3 & unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distSqrFromOrigin( const Point3 & pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distFromOrigin( const Point3 & pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline float dist( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 );
+
+// Conditionally select between two 3-D points
+// 
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+    float mX;
+    float mY;
+    float mZ;
+    float mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Copy a quaternion
+    // 
+    inline Quat( const Quat & quat );
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( float x, float y, float z, float w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( const Vector3 & xyz, float w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( const Vector4 & vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( float scalar );
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( const Quat & quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( float w );
+
+    // Get the x element of a quaternion
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( const Quat & quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( const Quat & quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( const Quat & quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( const Quat & quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( float scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( float radians );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( float scalar, const Quat & quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( const Quat & quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
+
+// Compute the dot product of two quaternions
+// 
+inline float dot( const Quat & quat0, const Quat & quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline float norm( const Quat & quat );
+
+// Compute the length of a quaternion
+// 
+inline float length( const Quat & quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( const Quat & quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
+
+// Conditionally select between two quaternions
+// 
+inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( const Quat & unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( float scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( const Vector3 & col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, const Vector3 & vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( const Vector3 & scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline float determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( float scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( const Vector4 & col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( const Vector4 & col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( const Vector4 & col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( const Vector4 & col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, const Vector4 & vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, float val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( const Vector4 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( const Quat & unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( const Vector3 & scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( const Vector3 & translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline float determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( float scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( const Vector3 & col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( const Vector3 & col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( const Vector3 & scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( const Vector3 & translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos.h
index d6b4cb0ba..f738e880f 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos.h
@@ -1,1833 +1,1833 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_C_H
-#define _VECTORMATH_MAT_AOS_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
-#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
-#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
-#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
-#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( &result->col0, &mat->col0 );
-    vmathV3Copy( &result->col1, &mat->col1 );
-    vmathV3Copy( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-}
-
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_uint4 select_x = (vec_uint4)spu_maskb(0xf000);
-    vec_uint4 select_z = (vec_uint4)spu_maskb(0x00f0);
-    xyzw_2 = spu_add( unitQuat->vec128, unitQuat->vec128 );
-    wwww = spu_shuffle( unitQuat->vec128, unitQuat->vec128, shuffle_wwww );
-    yzxw = spu_shuffle( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_SHUF_YZXW );
-    zxyw = spu_shuffle( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_SHUF_ZXYW );
-    yzxw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_YZXW );
-    zxyw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_ZXYW );
-    tmp0 = spu_mul( yzxw_2, wwww );
-    tmp1 = spu_nmsub( yzxw, yzxw_2, spu_splats(1.0f) );
-    tmp2 = spu_mul( yzxw, xyzw_2 );
-    tmp0 = spu_madd( zxyw, xyzw_2, tmp0 );
-    tmp1 = spu_nmsub( zxyw, zxyw_2, tmp1 );
-    tmp2 = spu_nmsub( zxyw_2, wwww, tmp2 );
-    tmp3 = spu_sel( tmp0, tmp1, select_x );
-    tmp4 = spu_sel( tmp1, tmp2, select_x );
-    tmp5 = spu_sel( tmp2, tmp0, select_x );
-    result->col0.vec128 = spu_sel( tmp3, tmp2, select_z );
-    result->col1.vec128 = spu_sel( tmp4, tmp0, select_z );
-    result->col2.vec128 = spu_sel( tmp5, tmp1, select_z );
-}
-
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
-}
-
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathM3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathM3GetCol( &tmpV3_0, mat, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col0 );
-}
-
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col1 );
-}
-
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Copy( result, &mat->col2 );
-}
-
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
-{
-    vmathV3Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
-{
-    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
-}
-
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, res0, res1, res2;
-    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
-    res0 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_XAYB );
-    res1 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
-    res2 = spu_shuffle( tmp1, mat->col1.vec128, _VECTORMATH_SHUF_XCY0 );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
-    tmp2 = _vmathVfCross( mat->col0.vec128, mat->col1.vec128 );
-    tmp0 = _vmathVfCross( mat->col1.vec128, mat->col2.vec128 );
-    tmp1 = _vmathVfCross( mat->col2.vec128, mat->col0.vec128 );
-    dot = _vmathVfDot3( tmp2, mat->col2.vec128 );
-    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
-    invdet = recipf4( dot );
-    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
-    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
-    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
-    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
-    inv0 = spu_mul( inv0, invdet );
-    inv1 = spu_mul( inv1, invdet );
-    inv2 = spu_mul( inv2, invdet );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-}
-
-static inline float vmathM3Determinant( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
-    return vmathV3Dot( &mat->col2, &tmpV3_0 );
-}
-
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3Neg( &result->col0, &mat->col0 );
-    vmathV3Neg( &result->col1, &mat->col1 );
-    vmathV3Neg( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathV3AbsPerElem( &result->col0, &mat->col0 );
-    vmathV3AbsPerElem( &result->col1, &mat->col1 );
-    vmathV3AbsPerElem( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
-}
-
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
-    res = spu_mul( mat->col0.vec128, xxxx );
-    res = spu_madd( mat->col1.vec128, yyyy, res );
-    res = spu_madd( mat->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    VmathMatrix3 tmpResult;
-    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM3Copy( result, &tmpResult );
-}
-
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
-{
-    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    vmathV3MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV3MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ->vec128;
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    result->col0.vec128 = spu_mul( Z0, Y0 );
-    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
-    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
-}
-
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    axis = unitVec->vec128;
-    sincosf4( spu_splats( radians ), &s, &c );
-    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
-    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
-    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    axisS = spu_mul( axis, s );
-    negAxisS = negatef4( axisS );
-    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
-    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
-    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
-    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
-    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
-    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
-    result->col0.vec128 = spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 );
-    result->col1.vec128 = spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 );
-    result->col2.vec128 = spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 );
-}
-
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
-{
-    vmathM3MakeFromQ( result, unitQuat );
-}
-
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
-    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
-    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
-}
-
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-}
-
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
-{
-    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
-}
-
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print( const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathM3GetRow( &tmpV3_0, mat, 0 );
-    vmathV3Print( &tmpV3_0 );
-    vmathM3GetRow( &tmpV3_1, mat, 1 );
-    vmathV3Print( &tmpV3_1 );
-    vmathM3GetRow( &tmpV3_2, mat, 2 );
-    vmathV3Print( &tmpV3_2 );
-}
-
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM3Print( mat );
-}
-
-#endif
-
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( &result->col0, &mat->col0 );
-    vmathV4Copy( &result->col1, &mat->col1 );
-    vmathV4Copy( &result->col2, &mat->col2 );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
-{
-    vmathV4MakeFromScalar( &result->col0, scalar );
-    vmathV4MakeFromScalar( &result->col1, scalar );
-    vmathV4MakeFromScalar( &result->col2, scalar );
-    vmathV4MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
-}
-
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-    vmathV4Copy( &result->col1, _col1 );
-    vmathV4Copy( &result->col2, _col2 );
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
-{
-    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 mat;
-    vmathM3MakeFromQ( &mat, unitQuat );
-    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
-{
-    vmathV4Copy( &result->col0, _col0 );
-}
-
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
-{
-    vmathV4Copy( &result->col1, _col1 );
-}
-
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
-{
-    vmathV4Copy( &result->col2, _col2 );
-}
-
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
-{
-    vmathV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
-{
-    vmathV4Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
-{
-    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
-{
-    VmathVector4 tmpV3_0;
-    vmathM4GetCol( &tmpV3_0, result, col );
-    vmathV4SetElem( &tmpV3_0, row, val );
-    vmathM4SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
-{
-    VmathVector4 tmpV4_0;
-    vmathM4GetCol( &tmpV4_0, mat, col );
-    return vmathV4GetElem( &tmpV4_0, row );
-}
-
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col0 );
-}
-
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col1 );
-}
-
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col2 );
-}
-
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Copy( result, &mat->col3 );
-}
-
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
-{
-    vmathV4Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
-{
-    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
-}
-
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
-    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat->col1.vec128, mat->col3.vec128, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( mat->col1.vec128, mat->col3.vec128, _VECTORMATH_SHUF_ZCWD );
-    res0 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    res1 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    res2 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    res3 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    result->col3.vec128 = res3;
-}
-
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 in0, in1, in2, in3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    vec_float4 cof0, cof1, cof2, cof3;
-    vec_float4 t0, t1, t2, t3;
-    vec_float4 t01, t02, t03, t12, t23;
-    vec_float4 t1r, t2r;
-    vec_float4 t01r, t02r, t03r, t12r, t23r;
-    vec_float4 t1r3, t1r3r;
-    vec_float4 det, det1, det2, det3, invdet;
-    in0 = mat->col0.vec128;
-    in1 = mat->col1.vec128;
-    in2 = mat->col2.vec128;
-    in3 = mat->col3.vec128;
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
-    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
-    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
-    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
-    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
-    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
-    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
-    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
-    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
-    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
-    cof1 = spu_mul(t0, t23);                      /* AGP ECL IOH MKD */
-    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
-    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
-    cof1 = spu_msub(t0, t23r, cof1);              /* AOH EKD IGP MCL  - cof1 */
-    cof1 = spu_rlqwbyte(cof1, 8);                 /* IGP MCL AOH EKD - IOH MKD AGP ECL */
-
-    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
-    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
-    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
-    cof3 = spu_mul(t0, t12);                      /* ANG EJC IFO MBK */
-    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
-    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
-    cof3 = spu_msub(t0, t12r, cof3);              /* AFO EBK ING MJC - cof3 */
-    cof3 = spu_rlqwbyte(cof3, 8);                 /* ING MJC AFO EBK - IFO MBK ANG EJC */
-    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
-    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
-    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
-    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
-    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
-    cof2 = spu_mul(t0, t1r3);                     /* AFP EBL INH MJD */
-    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
-    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
-    cof2 = spu_msub(t0, t1r3r, cof2);             /* ANH EJD IFP MBL - cof2 */
-    cof2 = spu_rlqwbyte(cof2, 8);                 /* IFP MBL ANH EJD - INH MJD AFP EBL */
-    t01 = spu_mul(t0, t1);                                /* AJ EN IB MF */
-    t01 = spu_shuffle(t01, t01, _VECTORMATH_SHUF_YXWZ);     /* EN AJ MF IB */
-    cof2 = spu_madd(t3, t01, cof2);               /* LEN PAJ DMF HIB + cof2 */
-    cof3 = spu_msub(t2r, t01, cof3);              /* KEN OAJ CMF GIB - cof3 */
-    t01r = spu_rlqwbyte(t01, 8);                  /* MF IB EN AJ */
-    cof2 = spu_msub(t3, t01r, cof2);              /* LMF PIB DEN HAJ - cof2 */
-    cof3 = spu_nmsub(t2r, t01r, cof3);            /* cof3 - KMF OIB CEN GAJ */
-    t03 = spu_mul(t0, t3);                                /* AL EP ID MH */
-    t03 = spu_shuffle(t03, t03, _VECTORMATH_SHUF_YXWZ);     /* EP AL MH ID */
-    cof1 = spu_nmsub(t2r, t03, cof1);             /* cof1 - KEP OAL CMH GID */
-    cof2 = spu_madd(t1, t03, cof2);               /* JEP NAL BMH FID + cof2 */
-    t03r = spu_rlqwbyte(t03, 8);                  /* MH ID EP AL */
-    cof1 = spu_madd(t2r, t03r, cof1);             /* KMH OID CEP GAL + cof1 */
-    cof2 = spu_nmsub(t1, t03r, cof2);             /* cof2 - JMH NID BEP FAL */
-    t02 = spu_mul(t0, t2r);                       /* AK EO IC MG */
-    t02 = spu_shuffle(t02, t02, _VECTORMATH_SHUF_YXWZ);     /* E0 AK MG IC */
-    cof1 = spu_madd(t3, t02, cof1);               /* LEO PAK DMG HIC + cof1 */
-    cof3 = spu_nmsub(t1, t02, cof3);              /* cof3 - JEO NAK BMG FIC */
-    t02r = spu_rlqwbyte(t02, 8);                  /* MG IC EO AK */
-    cof1 = spu_nmsub(t3, t02r, cof1);             /* cof1 - LMG PIC DEO HAK */
-    cof3 = spu_madd(t1, t02r, cof3);              /* JMG NIC BEO FAK + cof3 */
-    /* Compute the determinant of the matrix
-     *
-     * det = sum_across(t0 * cof0);
-     *
-     * We perform a sum across the entire vector so that
-     * we don't have to splat the result when multiplying the
-     * cofactors by the inverse of the determinant.
-     */
-    det  = spu_mul(t0, cof0);
-    det1 = spu_rlqwbyte(det, 4);
-    det2 = spu_rlqwbyte(det, 8);
-    det3 = spu_rlqwbyte(det, 12);
-    det  = spu_add(det, det1);
-    det2 = spu_add(det2, det3);
-    det  = spu_add(det, det2);
-    /* Compute the reciprocal of the determinant.
-     */
-    invdet = recipf4(det);
-    /* Multiply the cofactors by the reciprocal of the determinant.
-     */
-    result->col0.vec128 = spu_mul(cof0, invdet);
-    result->col1.vec128 = spu_mul(cof1, invdet);
-    result->col2.vec128 = spu_mul(cof2, invdet);
-    result->col3.vec128 = spu_mul(cof3, invdet);
-}
-
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3Inverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    VmathTransform3 affineMat, tmpT3_0;
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline float vmathM4Determinant( const VmathMatrix4 *mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 in0, in1, in2, in3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    vec_float4 cof0;
-    vec_float4 t0, t1, t2, t3;
-    vec_float4 t12, t23;
-    vec_float4 t1r, t2r;
-    vec_float4 t12r, t23r;
-    vec_float4 t1r3, t1r3r;
-    in0 = mat->col0.vec128;
-    in1 = mat->col1.vec128;
-    in2 = mat->col2.vec128;
-    in3 = mat->col3.vec128;
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
-    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
-    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
-    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
-    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
-    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
-    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
-    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
-    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
-    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
-    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
-    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
-
-    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
-    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
-    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
-    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
-    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
-    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
-    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
-    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
-    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
-    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
-    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
-    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
-    return spu_extract( _vmathVfDot4(t0,cof0), 0 );
-}
-
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4Neg( &result->col0, &mat->col0 );
-    vmathV4Neg( &result->col1, &mat->col1 );
-    vmathV4Neg( &result->col2, &mat->col2 );
-    vmathV4Neg( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathV4AbsPerElem( &result->col0, &mat->col0 );
-    vmathV4AbsPerElem( &result->col1, &mat->col1 );
-    vmathV4AbsPerElem( &result->col2, &mat->col2 );
-    vmathV4AbsPerElem( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
-    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
-}
-
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz, wwww;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
-    wwww = spu_shuffle( vec->vec128, vec->vec128, shuffle_wwww );
-    tmp0 = spu_mul( mat->col0.vec128, xxxx );
-    tmp1 = spu_mul( mat->col1.vec128, yyyy );
-    tmp0 = spu_madd( mat->col2.vec128, zzzz, tmp0 );
-    tmp1 = spu_madd( mat->col3.vec128, wwww, tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
-    res = spu_mul( mat->col0.vec128, xxxx );
-    res = spu_madd( mat->col1.vec128, yyyy, res );
-    res = spu_madd( mat->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_zzzz );
-    tmp0 = spu_mul( mat->col0.vec128, xxxx );
-    tmp1 = spu_mul( mat->col1.vec128, yyyy );
-    tmp0 = spu_madd( mat->col2.vec128, zzzz, tmp0 );
-    tmp1 = spu_add( mat->col3.vec128, tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    VmathMatrix4 tmpResult;
-    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
-{
-    VmathMatrix4 tmpResult;
-    VmathPoint3 tmpP3_0;
-    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
-    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
-    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
-    vmathM4Copy( result, &tmpResult );
-}
-
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
-{
-    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
-{
-    vmathV4SetXYZ( &result->col0, &mat3->col0 );
-    vmathV4SetXYZ( &result->col1, &mat3->col1 );
-    vmathV4SetXYZ( &result->col2, &mat3->col2 );
-}
-
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( &result->col0, &mat->col0 );
-    vmathV4GetXYZ( &result->col1, &mat->col1 );
-    vmathV4GetXYZ( &result->col2, &mat->col2 );
-}
-
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4SetXYZ( &result->col3, translateVec );
-}
-
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
-{
-    vmathV4GetXYZ( result, &mat->col3 );
-}
-
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    vmathV4MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV4MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ->vec128;
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    result->col0.vec128 = spu_mul( Z0, Y0 );
-    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
-    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    axis = unitVec->vec128;
-    sincosf4( spu_splats( radians ), &s, &c );
-    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
-    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
-    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    axisS = spu_mul( axis, s );
-    negAxisS = negatef4( axisS );
-    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
-    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
-    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
-    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
-    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
-    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
-    zeroW = (vec_float4)spu_maskb(0x000f);
-    axis = spu_andc( axis, zeroW );
-    result->col0.vec128 = spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 );
-    result->col1.vec128 = spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 );
-    result->col2.vec128 = spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
-{
-    VmathTransform3 tmpT3_0;
-    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
-    vmathM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
-    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
-    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
-    vmathV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
-{
-    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
-    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
-    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
-    vmathV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
-{
-    VmathVector4 scale4;
-    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
-    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
-    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
-    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
-    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
-}
-
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
-{
-    vmathV4MakeXAxis( &result->col0 );
-    vmathV4MakeYAxis( &result->col1 );
-    vmathV4MakeZAxis( &result->col2 );
-    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
-}
-
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
-{
-    VmathMatrix4 m4EyeFrame;
-    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathV3Normalize( &v3Y, upVec );
-    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
-    vmathV3Normalize( &v3Z, &tmpV3_0 );
-    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
-    vmathV3Normalize( &v3X, &tmpV3_1 );
-    vmathV3Cross( &v3Y, &v3Z, &v3X );
-    vmathV4MakeFromV3( &tmpV4_0, &v3X );
-    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
-    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
-    vmathV4MakeFromP3( &tmpV4_3, eyePos );
-    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
-    vmathM4OrthoInverse( result, &m4EyeFrame );
-}
-
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    vec_float4 zero, col0, col1, col2, col3;
-    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
-    rangeInv = 1.0f / ( zNear - zFar );
-    zero = spu_splats(0.0f);
-    col0 = zero;
-    col1 = zero;
-    col2 = zero;
-    col3 = zero;
-    col0 = spu_insert( f / aspect, col0, 0 );
-    col1 = spu_insert( f, col1, 1 );
-    col2 = spu_insert( ( zNear + zFar ) * rangeInv, col2, 2 );
-    col2 = spu_insert( -1.0f, col2, 3 );
-    col3 = spu_insert( zNear * zFar * rangeInv * 2.0f, col3, 2 );
-    result->col0.vec128 = col0;
-    result->col1.vec128 = col1;
-    result->col2.vec128 = col2;
-    result->col3.vec128 = col3;
-}
-
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff;
-    vec_float4 diagonal, column, near2;
-    vec_float4 zero = spu_splats(0.0f);
-    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
-    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
-    diff = spu_sub( rtn, lbf );
-    sum  = spu_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    near2 = spu_splats( zNear );
-    near2 = spu_add( near2, near2 );
-    diagonal = spu_mul( near2, inv_diff );
-    column = spu_mul( sum, inv_diff );
-    result->col0.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) );
-    result->col1.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) );
-    result->col2.vec128 = spu_sel( column, spu_splats(-1.0f), (vec_uint4)spu_maskb(0x000f) );
-    result->col3.vec128 = spu_sel( zero, spu_mul( diagonal, spu_splats(zFar) ), (vec_uint4)spu_maskb(0x00f0) );
-}
-
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff, neg_inv_diff;
-    vec_float4 diagonal, column;
-    vec_float4 zero = spu_splats(0.0f);
-    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
-    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
-    diff = spu_sub( rtn, lbf );
-    sum  = spu_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    neg_inv_diff = negatef4( inv_diff );
-    diagonal = spu_add( inv_diff, inv_diff );
-    column = spu_mul( sum, spu_sel( neg_inv_diff, inv_diff, (vec_uint4)spu_maskb(0x00f0) ) );
-    result->col0.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) );
-    result->col1.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) );
-    result->col2.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x00f0) );
-    result->col3.vec128 = spu_sel( column, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
-}
-
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
-{
-    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print( const VmathMatrix4 *mat )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathM4GetRow( &tmpV4_0, mat, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathM4GetRow( &tmpV4_1, mat, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathM4GetRow( &tmpV4_2, mat, 2 );
-    vmathV4Print( &tmpV4_2 );
-    vmathM4GetRow( &tmpV4_3, mat, 3 );
-    vmathV4Print( &tmpV4_3 );
-}
-
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathM4Print( mat );
-}
-
-#endif
-
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
-{
-    vmathV3MakeFromScalar( &result->col0, scalar );
-    vmathV3MakeFromScalar( &result->col1, scalar );
-    vmathV3MakeFromScalar( &result->col2, scalar );
-    vmathV3MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-    vmathV3Copy( &result->col1, _col1 );
-    vmathV3Copy( &result->col2, _col2 );
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
-{
-    vmathT3SetUpper3x3( result, tfrm );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
-{
-    VmathMatrix3 tmpM3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathT3SetUpper3x3( result, &tmpM3_0 );
-    vmathT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
-{
-    vmathV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
-{
-    vmathV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
-{
-    vmathV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
-{
-    vmathV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
-{
-    vmathV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
-{
-    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
-    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
-    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
-    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, result, col );
-    vmathV3SetElem( &tmpV3_0, row, val );
-    vmathT3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
-{
-    VmathVector3 tmpV3_0;
-    vmathT3GetCol( &tmpV3_0, tfrm, col );
-    return vmathV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col0 );
-}
-
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col1 );
-}
-
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col2 );
-}
-
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
-{
-    vmathV3Copy( result, (&tfrm->col0 + col) );
-}
-
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
-{
-    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
-}
-
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp2 = _vmathVfCross( tfrm->col0.vec128, tfrm->col1.vec128 );
-    tmp0 = _vmathVfCross( tfrm->col1.vec128, tfrm->col2.vec128 );
-    tmp1 = _vmathVfCross( tfrm->col2.vec128, tfrm->col0.vec128 );
-    inv3 = negatef4( tfrm->col3.vec128 );
-    dot = _vmathVfDot3( tmp2, tfrm->col2.vec128 );
-    dot = spu_shuffle( dot, dot, shuffle_xxxx );
-    invdet = recipf4( dot );
-    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
-    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
-    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
-    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
-    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
-    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
-    inv3 = spu_mul( inv0, xxxx );
-    inv3 = spu_madd( inv1, yyyy, inv3 );
-    inv3 = spu_madd( inv2, zzzz, inv3 );
-    inv0 = spu_mul( inv0, invdet );
-    inv1 = spu_mul( inv1, invdet );
-    inv2 = spu_mul( inv2, invdet );
-    inv3 = spu_mul( inv3, invdet );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-    result->col3.vec128 = inv3;
-}
-
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp0 = spu_shuffle( tfrm->col0.vec128, tfrm->col2.vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( tfrm->col0.vec128, tfrm->col2.vec128, _VECTORMATH_SHUF_ZCWD );
-    inv3 = negatef4( tfrm->col3.vec128 );
-    inv0 = spu_shuffle( tmp0, tfrm->col1.vec128, _VECTORMATH_SHUF_XAYB );
-    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
-    inv1 = spu_shuffle( tmp0, tfrm->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp1, tfrm->col1.vec128, _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
-    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
-    inv3 = spu_mul( inv0, xxxx );
-    inv3 = spu_madd( inv1, yyyy, inv3 );
-    inv3 = spu_madd( inv2, zzzz, inv3 );
-    result->col0.vec128 = inv0;
-    result->col1.vec128 = inv1;
-    result->col2.vec128 = inv2;
-    result->col3.vec128 = inv3;
-}
-
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
-    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
-    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
-    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
-    res = spu_mul( tfrm->col0.vec128, xxxx );
-    res = spu_madd( tfrm->col1.vec128, yyyy, res );
-    res = spu_madd( tfrm->col2.vec128, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_xxxx );
-    yyyy = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_yyyy );
-    zzzz = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_zzzz );
-    tmp0 = spu_mul( tfrm->col0.vec128, xxxx );
-    tmp1 = spu_mul( tfrm->col1.vec128, yyyy );
-    tmp0 = spu_madd( tfrm->col2.vec128, zzzz, tmp0 );
-    tmp1 = spu_add( tfrm->col3.vec128, tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    result->vec128 = res;
-}
-
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    VmathTransform3 tmpResult;
-    VmathPoint3 tmpP3_0, tmpP3_1;
-    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
-    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
-    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
-    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
-    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
-    vmathT3Copy( result, &tmpResult );
-}
-
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
-    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
-    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
-    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
-}
-
-static inline void vmathT3MakeIdentity( VmathTransform3 *result )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
-{
-    vmathV3Copy( &result->col0, &tfrm->col0 );
-    vmathV3Copy( &result->col1, &tfrm->col1 );
-    vmathV3Copy( &result->col2, &tfrm->col2 );
-}
-
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
-{
-    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
-}
-
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
-{
-    vmathV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    vmathV3MakeXAxis( &result->col0 );
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    result->col0.vec128 = res0;
-    vmathV3MakeYAxis( &result->col1 );
-    result->col2.vec128 = res2;
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ->vec128;
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    result->col0.vec128 = spu_mul( Z0, Y0 );
-    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
-    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
-{
-    VmathMatrix3 tmpM3_0;
-    VmathVector3 tmpV3_0;
-    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
-    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
-    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
-    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
-    vmathV3MakeFromScalar( &result->col3, 0.0f );
-}
-
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
-{
-    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
-    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
-    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
-    vmathV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
-{
-    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
-    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
-    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
-    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
-}
-
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
-{
-    vmathV3MakeXAxis( &result->col0 );
-    vmathV3MakeYAxis( &result->col1 );
-    vmathV3MakeZAxis( &result->col2 );
-    vmathV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
-{
-    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
-    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
-    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
-    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print( const VmathTransform3 *tfrm )
-{
-    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
-    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
-    vmathV4Print( &tmpV4_0 );
-    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
-    vmathV4Print( &tmpV4_1 );
-    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
-    vmathV4Print( &tmpV4_2 );
-}
-
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
-{
-    printf("%s:\n", name);
-    vmathT3Print( tfrm );
-}
-
-#endif
-
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
-{
-    vec_float4 res;
-    vec_float4 col0, col1, col2;
-    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
-    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
-    vec_float4 radicand, invSqrt, scale;
-    vec_float4 res0, res1, res2, res3;
-    vec_float4 xx, yy, zz;
-    vec_uint4 select_x = (vec_uint4)spu_maskb( 0xf000 );
-    vec_uint4 select_y = (vec_uint4)spu_maskb( 0x0f00 );
-    vec_uint4 select_z = (vec_uint4)spu_maskb( 0x00f0 );
-    vec_uint4 select_w = (vec_uint4)spu_maskb( 0x000f );
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((unsigned int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((unsigned int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((unsigned int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((unsigned int)0x0c0d0e0f);
-
-    col0 = tfrm->col0.vec128;
-    col1 = tfrm->col1.vec128;
-    col2 = tfrm->col2.vec128;
-
-    /* four cases: */
-    /* trace > 0 */
-    /* else */
-    /*    xx largest diagonal element */
-    /*    yy largest diagonal element */
-    /*    zz largest diagonal element */
-
-    /* compute quaternion for each case */
-
-    xx_yy = spu_sel( col0, col1, select_y );
-    xx_yy_zz_xx = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_XYCX );
-    yy_zz_xx_yy = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_YCXY );
-    zz_xx_yy_zz = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_CXYC );
-
-    diagSum = spu_add( spu_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    diagDiff = spu_sub( spu_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    radicand = spu_add( spu_sel( diagDiff, diagSum, select_w ), spu_splats(1.0f) );
-    invSqrt = rsqrtf4( radicand );
-
-    zy_xz_yx = spu_sel( col0, col1, select_z );
-    zy_xz_yx = spu_shuffle( zy_xz_yx, col2, _VECTORMATH_SHUF_ZAY0 );
-    yz_zx_xy = spu_sel( col0, col1, select_x );
-    yz_zx_xy = spu_shuffle( yz_zx_xy, col2, _VECTORMATH_SHUF_BZX0 );
-
-    sum = spu_add( zy_xz_yx, yz_zx_xy );
-    diff = spu_sub( zy_xz_yx, yz_zx_xy );
-
-    scale = spu_mul( invSqrt, spu_splats(0.5f) );
-    res0 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_0ZYA );
-    res1 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_Z0XB );
-    res2 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_YX0C );
-    res3 = diff;
-    res0 = spu_sel( res0, radicand, select_x );
-    res1 = spu_sel( res1, radicand, select_y );
-    res2 = spu_sel( res2, radicand, select_z );
-    res3 = spu_sel( res3, radicand, select_w );
-    res0 = spu_mul( res0, spu_shuffle( scale, scale, shuffle_xxxx ) );
-    res1 = spu_mul( res1, spu_shuffle( scale, scale, shuffle_yyyy ) );
-    res2 = spu_mul( res2, spu_shuffle( scale, scale, shuffle_zzzz ) );
-    res3 = spu_mul( res3, spu_shuffle( scale, scale, shuffle_wwww ) );
-
-    /* determine case and select answer */
-
-    xx = spu_shuffle( col0, col0, shuffle_xxxx );
-    yy = spu_shuffle( col1, col1, shuffle_yyyy );
-    zz = spu_shuffle( col2, col2, shuffle_zzzz );
-    res = spu_sel( res0, res1, spu_cmpgt( yy, xx ) );
-    res = spu_sel( res, res2, spu_and( spu_cmpgt( zz, xx ), spu_cmpgt( zz, yy ) ) );
-    res = spu_sel( res, res3, spu_cmpgt( spu_shuffle( diagSum, diagSum, shuffle_xxxx ), spu_splats(0.0f) ) );
-    result->vec128 = res;
-}
-
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
-{
-    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
-    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
-    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
-}
-
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
-{
-    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
-    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
-    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
-    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
-}
-
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
-    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
-    mcol0 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_XAYB );
-    mcol1 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
-    mcol2 = spu_shuffle( tmp1, mat->col1.vec128, _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
-    res = spu_mul( mcol0, xxxx );
-    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
-    res = spu_madd( mcol1, yyyy, res );
-    res = spu_madd( mcol2, zzzz, res );
-    result->vec128 = res;
-}
-
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
-{
-    vec_float4 neg, res0, res1, res2;
-    neg = negatef4( vec->vec128 );
-    res0 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_0ZB0 );
-    res1 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_C0X0 );
-    res2 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_YA00 );
-    result->col0.vec128 = res0;
-    result->col1.vec128 = res1;
-    result->col2.vec128 = res2;
-}
-
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
-{
-    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
-    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
-    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
-    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_C_H
+#define _VECTORMATH_MAT_AOS_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
+#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
+#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
+#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
+#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( &result->col0, &mat->col0 );
+    vmathV3Copy( &result->col1, &mat->col1 );
+    vmathV3Copy( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+}
+
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_uint4 select_x = (vec_uint4)spu_maskb(0xf000);
+    vec_uint4 select_z = (vec_uint4)spu_maskb(0x00f0);
+    xyzw_2 = spu_add( unitQuat->vec128, unitQuat->vec128 );
+    wwww = spu_shuffle( unitQuat->vec128, unitQuat->vec128, shuffle_wwww );
+    yzxw = spu_shuffle( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_SHUF_YZXW );
+    zxyw = spu_shuffle( unitQuat->vec128, unitQuat->vec128, _VECTORMATH_SHUF_ZXYW );
+    yzxw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_YZXW );
+    zxyw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_ZXYW );
+    tmp0 = spu_mul( yzxw_2, wwww );
+    tmp1 = spu_nmsub( yzxw, yzxw_2, spu_splats(1.0f) );
+    tmp2 = spu_mul( yzxw, xyzw_2 );
+    tmp0 = spu_madd( zxyw, xyzw_2, tmp0 );
+    tmp1 = spu_nmsub( zxyw, zxyw_2, tmp1 );
+    tmp2 = spu_nmsub( zxyw_2, wwww, tmp2 );
+    tmp3 = spu_sel( tmp0, tmp1, select_x );
+    tmp4 = spu_sel( tmp1, tmp2, select_x );
+    tmp5 = spu_sel( tmp2, tmp0, select_x );
+    result->col0.vec128 = spu_sel( tmp3, tmp2, select_z );
+    result->col1.vec128 = spu_sel( tmp4, tmp0, select_z );
+    result->col2.vec128 = spu_sel( tmp5, tmp1, select_z );
+}
+
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV3GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV3GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV3GetElem( vec, 2 ) );
+}
+
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathM3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathM3GetCol( &tmpV3_0, mat, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col0 );
+}
+
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col1 );
+}
+
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Copy( result, &mat->col2 );
+}
+
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col )
+{
+    vmathV3Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row )
+{
+    vmathV3MakeFromElems( result, vmathV3GetElem( &mat->col0, row ), vmathV3GetElem( &mat->col1, row ), vmathV3GetElem( &mat->col2, row ) );
+}
+
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, res0, res1, res2;
+    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
+    res0 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_XAYB );
+    res1 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
+    res2 = spu_shuffle( tmp1, mat->col1.vec128, _VECTORMATH_SHUF_XCY0 );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    tmp2 = _vmathVfCross( mat->col0.vec128, mat->col1.vec128 );
+    tmp0 = _vmathVfCross( mat->col1.vec128, mat->col2.vec128 );
+    tmp1 = _vmathVfCross( mat->col2.vec128, mat->col0.vec128 );
+    dot = _vmathVfDot3( tmp2, mat->col2.vec128 );
+    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
+    invdet = recipf4( dot );
+    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
+    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
+    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
+    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
+    inv0 = spu_mul( inv0, invdet );
+    inv1 = spu_mul( inv1, invdet );
+    inv2 = spu_mul( inv2, invdet );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+}
+
+static inline float vmathM3Determinant( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
+    return vmathV3Dot( &mat->col2, &tmpV3_0 );
+}
+
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Add( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3Neg( &result->col0, &mat->col0 );
+    vmathV3Neg( &result->col1, &mat->col1 );
+    vmathV3Neg( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathV3AbsPerElem( &result->col0, &mat->col0 );
+    vmathV3AbsPerElem( &result->col1, &mat->col1 );
+    vmathV3AbsPerElem( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV3ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV3ScalarMul( &result->col2, &mat->col2, scalar );
+}
+
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
+    res = spu_mul( mat->col0.vec128, xxxx );
+    res = spu_madd( mat->col1.vec128, yyyy, res );
+    res = spu_madd( mat->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    VmathMatrix3 tmpResult;
+    vmathM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM3Copy( result, &tmpResult );
+}
+
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 )
+{
+    vmathV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    vmathV3MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV3MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ->vec128;
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    result->col0.vec128 = spu_mul( Z0, Y0 );
+    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
+    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
+}
+
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    axis = unitVec->vec128;
+    sincosf4( spu_splats( radians ), &s, &c );
+    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
+    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
+    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    axisS = spu_mul( axis, s );
+    negAxisS = negatef4( axisS );
+    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
+    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
+    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
+    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
+    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
+    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
+    result->col0.vec128 = spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 );
+    result->col1.vec128 = spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 );
+    result->col2.vec128 = spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 );
+}
+
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat )
+{
+    vmathM3MakeFromQ( result, unitQuat );
+}
+
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
+    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
+    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
+}
+
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+}
+
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat )
+{
+    vmathV3MulPerElem( &result->col0, &mat->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &mat->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &mat->col2, scaleVec );
+}
+
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print( const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathM3GetRow( &tmpV3_0, mat, 0 );
+    vmathV3Print( &tmpV3_0 );
+    vmathM3GetRow( &tmpV3_1, mat, 1 );
+    vmathV3Print( &tmpV3_1 );
+    vmathM3GetRow( &tmpV3_2, mat, 2 );
+    vmathV3Print( &tmpV3_2 );
+}
+
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM3Print( mat );
+}
+
+#endif
+
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( &result->col0, &mat->col0 );
+    vmathV4Copy( &result->col1, &mat->col1 );
+    vmathV4Copy( &result->col2, &mat->col2 );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar )
+{
+    vmathV4MakeFromScalar( &result->col0, scalar );
+    vmathV4MakeFromScalar( &result->col1, scalar );
+    vmathV4MakeFromScalar( &result->col2, scalar );
+    vmathV4MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, &mat->col3, 1.0f );
+}
+
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *_col0, const VmathVector4 *_col1, const VmathVector4 *_col2, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+    vmathV4Copy( &result->col1, _col1 );
+    vmathV4Copy( &result->col2, _col2 );
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec )
+{
+    vmathV4MakeFromV3Scalar( &result->col0, &mat->col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat->col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat->col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 mat;
+    vmathM3MakeFromQ( &mat, unitQuat );
+    vmathV4MakeFromV3Scalar( &result->col0, &mat.col0, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col1, &mat.col1, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col2, &mat.col2, 0.0f );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *_col0 )
+{
+    vmathV4Copy( &result->col0, _col0 );
+}
+
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *_col1 )
+{
+    vmathV4Copy( &result->col1, _col1 );
+}
+
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *_col2 )
+{
+    vmathV4Copy( &result->col2, _col2 );
+}
+
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *_col3 )
+{
+    vmathV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec )
+{
+    vmathV4Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec )
+{
+    vmathV4SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV4SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV4SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV4SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val )
+{
+    VmathVector4 tmpV3_0;
+    vmathM4GetCol( &tmpV3_0, result, col );
+    vmathV4SetElem( &tmpV3_0, row, val );
+    vmathM4SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row )
+{
+    VmathVector4 tmpV4_0;
+    vmathM4GetCol( &tmpV4_0, mat, col );
+    return vmathV4GetElem( &tmpV4_0, row );
+}
+
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col0 );
+}
+
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col1 );
+}
+
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col2 );
+}
+
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Copy( result, &mat->col3 );
+}
+
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col )
+{
+    vmathV4Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row )
+{
+    vmathV4MakeFromElems( result, vmathV4GetElem( &mat->col0, row ), vmathV4GetElem( &mat->col1, row ), vmathV4GetElem( &mat->col2, row ), vmathV4GetElem( &mat->col3, row ) );
+}
+
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat->col1.vec128, mat->col3.vec128, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( mat->col1.vec128, mat->col3.vec128, _VECTORMATH_SHUF_ZCWD );
+    res0 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    res1 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    res2 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    res3 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    result->col3.vec128 = res3;
+}
+
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 in0, in1, in2, in3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    vec_float4 cof0, cof1, cof2, cof3;
+    vec_float4 t0, t1, t2, t3;
+    vec_float4 t01, t02, t03, t12, t23;
+    vec_float4 t1r, t2r;
+    vec_float4 t01r, t02r, t03r, t12r, t23r;
+    vec_float4 t1r3, t1r3r;
+    vec_float4 det, det1, det2, det3, invdet;
+    in0 = mat->col0.vec128;
+    in1 = mat->col1.vec128;
+    in2 = mat->col2.vec128;
+    in3 = mat->col3.vec128;
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
+    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
+    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
+    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
+    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
+    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
+    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
+    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
+    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
+    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
+    cof1 = spu_mul(t0, t23);                      /* AGP ECL IOH MKD */
+    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
+    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
+    cof1 = spu_msub(t0, t23r, cof1);              /* AOH EKD IGP MCL  - cof1 */
+    cof1 = spu_rlqwbyte(cof1, 8);                 /* IGP MCL AOH EKD - IOH MKD AGP ECL */
+
+    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
+    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
+    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
+    cof3 = spu_mul(t0, t12);                      /* ANG EJC IFO MBK */
+    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
+    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
+    cof3 = spu_msub(t0, t12r, cof3);              /* AFO EBK ING MJC - cof3 */
+    cof3 = spu_rlqwbyte(cof3, 8);                 /* ING MJC AFO EBK - IFO MBK ANG EJC */
+    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
+    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
+    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
+    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
+    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
+    cof2 = spu_mul(t0, t1r3);                     /* AFP EBL INH MJD */
+    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
+    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
+    cof2 = spu_msub(t0, t1r3r, cof2);             /* ANH EJD IFP MBL - cof2 */
+    cof2 = spu_rlqwbyte(cof2, 8);                 /* IFP MBL ANH EJD - INH MJD AFP EBL */
+    t01 = spu_mul(t0, t1);                                /* AJ EN IB MF */
+    t01 = spu_shuffle(t01, t01, _VECTORMATH_SHUF_YXWZ);     /* EN AJ MF IB */
+    cof2 = spu_madd(t3, t01, cof2);               /* LEN PAJ DMF HIB + cof2 */
+    cof3 = spu_msub(t2r, t01, cof3);              /* KEN OAJ CMF GIB - cof3 */
+    t01r = spu_rlqwbyte(t01, 8);                  /* MF IB EN AJ */
+    cof2 = spu_msub(t3, t01r, cof2);              /* LMF PIB DEN HAJ - cof2 */
+    cof3 = spu_nmsub(t2r, t01r, cof3);            /* cof3 - KMF OIB CEN GAJ */
+    t03 = spu_mul(t0, t3);                                /* AL EP ID MH */
+    t03 = spu_shuffle(t03, t03, _VECTORMATH_SHUF_YXWZ);     /* EP AL MH ID */
+    cof1 = spu_nmsub(t2r, t03, cof1);             /* cof1 - KEP OAL CMH GID */
+    cof2 = spu_madd(t1, t03, cof2);               /* JEP NAL BMH FID + cof2 */
+    t03r = spu_rlqwbyte(t03, 8);                  /* MH ID EP AL */
+    cof1 = spu_madd(t2r, t03r, cof1);             /* KMH OID CEP GAL + cof1 */
+    cof2 = spu_nmsub(t1, t03r, cof2);             /* cof2 - JMH NID BEP FAL */
+    t02 = spu_mul(t0, t2r);                       /* AK EO IC MG */
+    t02 = spu_shuffle(t02, t02, _VECTORMATH_SHUF_YXWZ);     /* E0 AK MG IC */
+    cof1 = spu_madd(t3, t02, cof1);               /* LEO PAK DMG HIC + cof1 */
+    cof3 = spu_nmsub(t1, t02, cof3);              /* cof3 - JEO NAK BMG FIC */
+    t02r = spu_rlqwbyte(t02, 8);                  /* MG IC EO AK */
+    cof1 = spu_nmsub(t3, t02r, cof1);             /* cof1 - LMG PIC DEO HAK */
+    cof3 = spu_madd(t1, t02r, cof3);              /* JMG NIC BEO FAK + cof3 */
+    /* Compute the determinant of the matrix
+     *
+     * det = sum_across(t0 * cof0);
+     *
+     * We perform a sum across the entire vector so that
+     * we don't have to splat the result when multiplying the
+     * cofactors by the inverse of the determinant.
+     */
+    det  = spu_mul(t0, cof0);
+    det1 = spu_rlqwbyte(det, 4);
+    det2 = spu_rlqwbyte(det, 8);
+    det3 = spu_rlqwbyte(det, 12);
+    det  = spu_add(det, det1);
+    det2 = spu_add(det2, det3);
+    det  = spu_add(det, det2);
+    /* Compute the reciprocal of the determinant.
+     */
+    invdet = recipf4(det);
+    /* Multiply the cofactors by the reciprocal of the determinant.
+     */
+    result->col0.vec128 = spu_mul(cof0, invdet);
+    result->col1.vec128 = spu_mul(cof1, invdet);
+    result->col2.vec128 = spu_mul(cof2, invdet);
+    result->col3.vec128 = spu_mul(cof3, invdet);
+}
+
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3Inverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    VmathTransform3 affineMat, tmpT3_0;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathT3OrthoInverse( &tmpT3_0, &affineMat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline float vmathM4Determinant( const VmathMatrix4 *mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 in0, in1, in2, in3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    vec_float4 cof0;
+    vec_float4 t0, t1, t2, t3;
+    vec_float4 t12, t23;
+    vec_float4 t1r, t2r;
+    vec_float4 t12r, t23r;
+    vec_float4 t1r3, t1r3r;
+    in0 = mat->col0.vec128;
+    in1 = mat->col1.vec128;
+    in2 = mat->col2.vec128;
+    in3 = mat->col3.vec128;
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
+    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
+    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
+    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
+    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
+    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
+    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
+    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
+    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
+    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
+    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
+    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
+
+    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
+    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
+    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
+    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
+    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
+    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
+    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
+    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
+    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
+    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
+    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
+    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
+    return spu_extract( _vmathVfDot4(t0,cof0), 0 );
+}
+
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Add( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Add( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4Neg( &result->col0, &mat->col0 );
+    vmathV4Neg( &result->col1, &mat->col1 );
+    vmathV4Neg( &result->col2, &mat->col2 );
+    vmathV4Neg( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathV4AbsPerElem( &result->col0, &mat->col0 );
+    vmathV4AbsPerElem( &result->col1, &mat->col1 );
+    vmathV4AbsPerElem( &result->col2, &mat->col2 );
+    vmathV4AbsPerElem( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathV4ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathV4ScalarMul( &result->col2, &mat->col2, scalar );
+    vmathV4ScalarMul( &result->col3, &mat->col3, scalar );
+}
+
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz, wwww;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
+    wwww = spu_shuffle( vec->vec128, vec->vec128, shuffle_wwww );
+    tmp0 = spu_mul( mat->col0.vec128, xxxx );
+    tmp1 = spu_mul( mat->col1.vec128, yyyy );
+    tmp0 = spu_madd( mat->col2.vec128, zzzz, tmp0 );
+    tmp1 = spu_madd( mat->col3.vec128, wwww, tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
+    res = spu_mul( mat->col0.vec128, xxxx );
+    res = spu_madd( mat->col1.vec128, yyyy, res );
+    res = spu_madd( mat->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_zzzz );
+    tmp0 = spu_mul( mat->col0.vec128, xxxx );
+    tmp1 = spu_mul( mat->col1.vec128, yyyy );
+    tmp0 = spu_madd( mat->col2.vec128, zzzz, tmp0 );
+    tmp1 = spu_add( mat->col3.vec128, tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    VmathMatrix4 tmpResult;
+    vmathM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm1 )
+{
+    VmathMatrix4 tmpResult;
+    VmathPoint3 tmpP3_0;
+    vmathM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
+    vmathM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
+    vmathM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
+    vmathM4Copy( result, &tmpResult );
+}
+
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 )
+{
+    vmathV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 )
+{
+    vmathV4SetXYZ( &result->col0, &mat3->col0 );
+    vmathV4SetXYZ( &result->col1, &mat3->col1 );
+    vmathV4SetXYZ( &result->col2, &mat3->col2 );
+}
+
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( &result->col0, &mat->col0 );
+    vmathV4GetXYZ( &result->col1, &mat->col1 );
+    vmathV4GetXYZ( &result->col2, &mat->col2 );
+}
+
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4SetXYZ( &result->col3, translateVec );
+}
+
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat )
+{
+    vmathV4GetXYZ( result, &mat->col3 );
+}
+
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    vmathV4MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV4MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ->vec128;
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    result->col0.vec128 = spu_mul( Z0, Y0 );
+    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
+    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    axis = unitVec->vec128;
+    sincosf4( spu_splats( radians ), &s, &c );
+    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
+    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
+    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    axisS = spu_mul( axis, s );
+    negAxisS = negatef4( axisS );
+    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
+    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
+    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
+    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
+    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
+    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
+    zeroW = (vec_float4)spu_maskb(0x000f);
+    axis = spu_andc( axis, zeroW );
+    result->col0.vec128 = spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 );
+    result->col1.vec128 = spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 );
+    result->col2.vec128 = spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat )
+{
+    VmathTransform3 tmpT3_0;
+    vmathT3MakeRotationQ( &tmpT3_0, unitQuat );
+    vmathM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
+    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
+    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
+    vmathV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec )
+{
+    vmathV4ScalarMul( &result->col0, &mat->col0, vmathV3GetX( scaleVec ) );
+    vmathV4ScalarMul( &result->col1, &mat->col1, vmathV3GetY( scaleVec ) );
+    vmathV4ScalarMul( &result->col2, &mat->col2, vmathV3GetZ( scaleVec ) );
+    vmathV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat )
+{
+    VmathVector4 scale4;
+    vmathV4MakeFromV3Scalar( &scale4, scaleVec, 1.0f );
+    vmathV4MulPerElem( &result->col0, &mat->col0, &scale4 );
+    vmathV4MulPerElem( &result->col1, &mat->col1, &scale4 );
+    vmathV4MulPerElem( &result->col2, &mat->col2, &scale4 );
+    vmathV4MulPerElem( &result->col3, &mat->col3, &scale4 );
+}
+
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec )
+{
+    vmathV4MakeXAxis( &result->col0 );
+    vmathV4MakeYAxis( &result->col1 );
+    vmathV4MakeZAxis( &result->col2 );
+    vmathV4MakeFromV3Scalar( &result->col3, translateVec, 1.0f );
+}
+
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec )
+{
+    VmathMatrix4 m4EyeFrame;
+    VmathVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathV3Normalize( &v3Y, upVec );
+    vmathP3Sub( &tmpV3_0, eyePos, lookAtPos );
+    vmathV3Normalize( &v3Z, &tmpV3_0 );
+    vmathV3Cross( &tmpV3_1, &v3Y, &v3Z );
+    vmathV3Normalize( &v3X, &tmpV3_1 );
+    vmathV3Cross( &v3Y, &v3Z, &v3X );
+    vmathV4MakeFromV3( &tmpV4_0, &v3X );
+    vmathV4MakeFromV3( &tmpV4_1, &v3Y );
+    vmathV4MakeFromV3( &tmpV4_2, &v3Z );
+    vmathV4MakeFromP3( &tmpV4_3, eyePos );
+    vmathM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
+    vmathM4OrthoInverse( result, &m4EyeFrame );
+}
+
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    vec_float4 zero, col0, col1, col2, col3;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = spu_splats(0.0f);
+    col0 = zero;
+    col1 = zero;
+    col2 = zero;
+    col3 = zero;
+    col0 = spu_insert( f / aspect, col0, 0 );
+    col1 = spu_insert( f, col1, 1 );
+    col2 = spu_insert( ( zNear + zFar ) * rangeInv, col2, 2 );
+    col2 = spu_insert( -1.0f, col2, 3 );
+    col3 = spu_insert( zNear * zFar * rangeInv * 2.0f, col3, 2 );
+    result->col0.vec128 = col0;
+    result->col1.vec128 = col1;
+    result->col2.vec128 = col2;
+    result->col3.vec128 = col3;
+}
+
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff;
+    vec_float4 diagonal, column, near2;
+    vec_float4 zero = spu_splats(0.0f);
+    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
+    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
+    diff = spu_sub( rtn, lbf );
+    sum  = spu_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = spu_splats( zNear );
+    near2 = spu_add( near2, near2 );
+    diagonal = spu_mul( near2, inv_diff );
+    column = spu_mul( sum, inv_diff );
+    result->col0.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) );
+    result->col1.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) );
+    result->col2.vec128 = spu_sel( column, spu_splats(-1.0f), (vec_uint4)spu_maskb(0x000f) );
+    result->col3.vec128 = spu_sel( zero, spu_mul( diagonal, spu_splats(zFar) ), (vec_uint4)spu_maskb(0x00f0) );
+}
+
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff, neg_inv_diff;
+    vec_float4 diagonal, column;
+    vec_float4 zero = spu_splats(0.0f);
+    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
+    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
+    diff = spu_sub( rtn, lbf );
+    sum  = spu_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = spu_add( inv_diff, inv_diff );
+    column = spu_mul( sum, spu_sel( neg_inv_diff, inv_diff, (vec_uint4)spu_maskb(0x00f0) ) );
+    result->col0.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) );
+    result->col1.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) );
+    result->col2.vec128 = spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x00f0) );
+    result->col3.vec128 = spu_sel( column, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
+}
+
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 )
+{
+    vmathV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+    vmathV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print( const VmathMatrix4 *mat )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathM4GetRow( &tmpV4_0, mat, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathM4GetRow( &tmpV4_1, mat, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathM4GetRow( &tmpV4_2, mat, 2 );
+    vmathV4Print( &tmpV4_2 );
+    vmathM4GetRow( &tmpV4_3, mat, 3 );
+    vmathV4Print( &tmpV4_3 );
+}
+
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathM4Print( mat );
+}
+
+#endif
+
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar )
+{
+    vmathV3MakeFromScalar( &result->col0, scalar );
+    vmathV3MakeFromScalar( &result->col1, scalar );
+    vmathV3MakeFromScalar( &result->col2, scalar );
+    vmathV3MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *_col0, const VmathVector3 *_col1, const VmathVector3 *_col2, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+    vmathV3Copy( &result->col1, _col1 );
+    vmathV3Copy( &result->col2, _col2 );
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec )
+{
+    vmathT3SetUpper3x3( result, tfrm );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec )
+{
+    VmathMatrix3 tmpM3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathT3SetUpper3x3( result, &tmpM3_0 );
+    vmathT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *_col0 )
+{
+    vmathV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *_col1 )
+{
+    vmathV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *_col2 )
+{
+    vmathV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *_col3 )
+{
+    vmathV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec )
+{
+    vmathV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec )
+{
+    vmathV3SetElem( &result->col0, row, vmathV4GetElem( vec, 0 ) );
+    vmathV3SetElem( &result->col1, row, vmathV4GetElem( vec, 1 ) );
+    vmathV3SetElem( &result->col2, row, vmathV4GetElem( vec, 2 ) );
+    vmathV3SetElem( &result->col3, row, vmathV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, result, col );
+    vmathV3SetElem( &tmpV3_0, row, val );
+    vmathT3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row )
+{
+    VmathVector3 tmpV3_0;
+    vmathT3GetCol( &tmpV3_0, tfrm, col );
+    return vmathV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col0 );
+}
+
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col1 );
+}
+
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col2 );
+}
+
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col )
+{
+    vmathV3Copy( result, (&tfrm->col0 + col) );
+}
+
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row )
+{
+    vmathV4MakeFromElems( result, vmathV3GetElem( &tfrm->col0, row ), vmathV3GetElem( &tfrm->col1, row ), vmathV3GetElem( &tfrm->col2, row ), vmathV3GetElem( &tfrm->col3, row ) );
+}
+
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp2 = _vmathVfCross( tfrm->col0.vec128, tfrm->col1.vec128 );
+    tmp0 = _vmathVfCross( tfrm->col1.vec128, tfrm->col2.vec128 );
+    tmp1 = _vmathVfCross( tfrm->col2.vec128, tfrm->col0.vec128 );
+    inv3 = negatef4( tfrm->col3.vec128 );
+    dot = _vmathVfDot3( tmp2, tfrm->col2.vec128 );
+    dot = spu_shuffle( dot, dot, shuffle_xxxx );
+    invdet = recipf4( dot );
+    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
+    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
+    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
+    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
+    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
+    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
+    inv3 = spu_mul( inv0, xxxx );
+    inv3 = spu_madd( inv1, yyyy, inv3 );
+    inv3 = spu_madd( inv2, zzzz, inv3 );
+    inv0 = spu_mul( inv0, invdet );
+    inv1 = spu_mul( inv1, invdet );
+    inv2 = spu_mul( inv2, invdet );
+    inv3 = spu_mul( inv3, invdet );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+    result->col3.vec128 = inv3;
+}
+
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp0 = spu_shuffle( tfrm->col0.vec128, tfrm->col2.vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( tfrm->col0.vec128, tfrm->col2.vec128, _VECTORMATH_SHUF_ZCWD );
+    inv3 = negatef4( tfrm->col3.vec128 );
+    inv0 = spu_shuffle( tmp0, tfrm->col1.vec128, _VECTORMATH_SHUF_XAYB );
+    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
+    inv1 = spu_shuffle( tmp0, tfrm->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp1, tfrm->col1.vec128, _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
+    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
+    inv3 = spu_mul( inv0, xxxx );
+    inv3 = spu_madd( inv1, yyyy, inv3 );
+    inv3 = spu_madd( inv2, zzzz, inv3 );
+    result->col0.vec128 = inv0;
+    result->col1.vec128 = inv1;
+    result->col2.vec128 = inv2;
+    result->col3.vec128 = inv3;
+}
+
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3AbsPerElem( &result->col0, &tfrm->col0 );
+    vmathV3AbsPerElem( &result->col1, &tfrm->col1 );
+    vmathV3AbsPerElem( &result->col2, &tfrm->col2 );
+    vmathV3AbsPerElem( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec )
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
+    res = spu_mul( tfrm->col0.vec128, xxxx );
+    res = spu_madd( tfrm->col1.vec128, yyyy, res );
+    res = spu_madd( tfrm->col2.vec128, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt )
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_xxxx );
+    yyyy = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_yyyy );
+    zzzz = spu_shuffle( pnt->vec128, pnt->vec128, shuffle_zzzz );
+    tmp0 = spu_mul( tfrm->col0.vec128, xxxx );
+    tmp1 = spu_mul( tfrm->col1.vec128, yyyy );
+    tmp0 = spu_madd( tfrm->col2.vec128, zzzz, tmp0 );
+    tmp1 = spu_add( tfrm->col3.vec128, tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    result->vec128 = res;
+}
+
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    VmathTransform3 tmpResult;
+    VmathPoint3 tmpP3_0, tmpP3_1;
+    vmathT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
+    vmathT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
+    vmathT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
+    vmathP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
+    vmathV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
+    vmathT3Copy( result, &tmpResult );
+}
+
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
+    vmathV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
+    vmathV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
+    vmathV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
+}
+
+static inline void vmathT3MakeIdentity( VmathTransform3 *result )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *tfrm )
+{
+    vmathV3Copy( &result->col0, &tfrm->col0 );
+    vmathV3Copy( &result->col1, &tfrm->col1 );
+    vmathV3Copy( &result->col2, &tfrm->col2 );
+}
+
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm )
+{
+    vmathM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
+}
+
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm )
+{
+    vmathV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    vmathV3MakeXAxis( &result->col0 );
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    result->col0.vec128 = res0;
+    vmathV3MakeYAxis( &result->col1 );
+    result->col2.vec128 = res2;
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ->vec128;
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    result->col0.vec128 = spu_mul( Z0, Y0 );
+    result->col1.vec128 = spu_madd( Z1, X1, spu_mul( tmp, X0 ) );
+    result->col2.vec128 = spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat )
+{
+    VmathMatrix3 tmpM3_0;
+    VmathVector3 tmpV3_0;
+    vmathM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathV3MakeFromScalar( &tmpV3_0, 0.0f );
+    vmathT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    result->col0.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0xf000) );
+    result->col1.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x0f00) );
+    result->col2.vec128 = spu_sel( zero, scaleVec->vec128, (vec_uint4)spu_maskb(0x00f0) );
+    vmathV3MakeFromScalar( &result->col3, 0.0f );
+}
+
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec )
+{
+    vmathV3ScalarMul( &result->col0, &tfrm->col0, vmathV3GetX( scaleVec ) );
+    vmathV3ScalarMul( &result->col1, &tfrm->col1, vmathV3GetY( scaleVec ) );
+    vmathV3ScalarMul( &result->col2, &tfrm->col2, vmathV3GetZ( scaleVec ) );
+    vmathV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm )
+{
+    vmathV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
+    vmathV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
+    vmathV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
+    vmathV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
+}
+
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec )
+{
+    vmathV3MakeXAxis( &result->col0 );
+    vmathV3MakeYAxis( &result->col1 );
+    vmathV3MakeZAxis( &result->col2 );
+    vmathV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 )
+{
+    vmathV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
+    vmathV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
+    vmathV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
+    vmathV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print( const VmathTransform3 *tfrm )
+{
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
+    vmathT3GetRow( &tmpV4_0, tfrm, 0 );
+    vmathV4Print( &tmpV4_0 );
+    vmathT3GetRow( &tmpV4_1, tfrm, 1 );
+    vmathV4Print( &tmpV4_1 );
+    vmathT3GetRow( &tmpV4_2, tfrm, 2 );
+    vmathV4Print( &tmpV4_2 );
+}
+
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name )
+{
+    printf("%s:\n", name);
+    vmathT3Print( tfrm );
+}
+
+#endif
+
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *tfrm )
+{
+    vec_float4 res;
+    vec_float4 col0, col1, col2;
+    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
+    vec_float4 radicand, invSqrt, scale;
+    vec_float4 res0, res1, res2, res3;
+    vec_float4 xx, yy, zz;
+    vec_uint4 select_x = (vec_uint4)spu_maskb( 0xf000 );
+    vec_uint4 select_y = (vec_uint4)spu_maskb( 0x0f00 );
+    vec_uint4 select_z = (vec_uint4)spu_maskb( 0x00f0 );
+    vec_uint4 select_w = (vec_uint4)spu_maskb( 0x000f );
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((unsigned int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((unsigned int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((unsigned int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((unsigned int)0x0c0d0e0f);
+
+    col0 = tfrm->col0.vec128;
+    col1 = tfrm->col1.vec128;
+    col2 = tfrm->col2.vec128;
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = spu_sel( col0, col1, select_y );
+    xx_yy_zz_xx = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_XYCX );
+    yy_zz_xx_yy = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_YCXY );
+    zz_xx_yy_zz = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_CXYC );
+
+    diagSum = spu_add( spu_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = spu_sub( spu_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = spu_add( spu_sel( diagDiff, diagSum, select_w ), spu_splats(1.0f) );
+    invSqrt = rsqrtf4( radicand );
+
+    zy_xz_yx = spu_sel( col0, col1, select_z );
+    zy_xz_yx = spu_shuffle( zy_xz_yx, col2, _VECTORMATH_SHUF_ZAY0 );
+    yz_zx_xy = spu_sel( col0, col1, select_x );
+    yz_zx_xy = spu_shuffle( yz_zx_xy, col2, _VECTORMATH_SHUF_BZX0 );
+
+    sum = spu_add( zy_xz_yx, yz_zx_xy );
+    diff = spu_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = spu_mul( invSqrt, spu_splats(0.5f) );
+    res0 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_0ZYA );
+    res1 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_Z0XB );
+    res2 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_YX0C );
+    res3 = diff;
+    res0 = spu_sel( res0, radicand, select_x );
+    res1 = spu_sel( res1, radicand, select_y );
+    res2 = spu_sel( res2, radicand, select_z );
+    res3 = spu_sel( res3, radicand, select_w );
+    res0 = spu_mul( res0, spu_shuffle( scale, scale, shuffle_xxxx ) );
+    res1 = spu_mul( res1, spu_shuffle( scale, scale, shuffle_yyyy ) );
+    res2 = spu_mul( res2, spu_shuffle( scale, scale, shuffle_zzzz ) );
+    res3 = spu_mul( res3, spu_shuffle( scale, scale, shuffle_wwww ) );
+
+    /* determine case and select answer */
+
+    xx = spu_shuffle( col0, col0, shuffle_xxxx );
+    yy = spu_shuffle( col1, col1, shuffle_yyyy );
+    zz = spu_shuffle( col2, col2, shuffle_zzzz );
+    res = spu_sel( res0, res1, spu_cmpgt( yy, xx ) );
+    res = spu_sel( res, res2, spu_and( spu_cmpgt( zz, xx ), spu_cmpgt( zz, yy ) ) );
+    res = spu_sel( res, res3, spu_cmpgt( spu_shuffle( diagSum, diagSum, shuffle_xxxx ), spu_splats(0.0f) ) );
+    result->vec128 = res;
+}
+
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *tfrm0, const VmathVector3 *tfrm1 )
+{
+    vmathV3ScalarMul( &result->col0, tfrm0, vmathV3GetX( tfrm1 ) );
+    vmathV3ScalarMul( &result->col1, tfrm0, vmathV3GetY( tfrm1 ) );
+    vmathV3ScalarMul( &result->col2, tfrm0, vmathV3GetZ( tfrm1 ) );
+}
+
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *tfrm0, const VmathVector4 *tfrm1 )
+{
+    vmathV4ScalarMul( &result->col0, tfrm0, vmathV4GetX( tfrm1 ) );
+    vmathV4ScalarMul( &result->col1, tfrm0, vmathV4GetY( tfrm1 ) );
+    vmathV4ScalarMul( &result->col2, tfrm0, vmathV4GetZ( tfrm1 ) );
+    vmathV4ScalarMul( &result->col3, tfrm0, vmathV4GetW( tfrm1 ) );
+}
+
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp0 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat->col0.vec128, mat->col2.vec128, _VECTORMATH_SHUF_ZCWD );
+    xxxx = spu_shuffle( vec->vec128, vec->vec128, shuffle_xxxx );
+    mcol0 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_XAYB );
+    mcol1 = spu_shuffle( tmp0, mat->col1.vec128, _VECTORMATH_SHUF_ZBW0 );
+    mcol2 = spu_shuffle( tmp1, mat->col1.vec128, _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( vec->vec128, vec->vec128, shuffle_yyyy );
+    res = spu_mul( mcol0, xxxx );
+    zzzz = spu_shuffle( vec->vec128, vec->vec128, shuffle_zzzz );
+    res = spu_madd( mcol1, yyyy, res );
+    res = spu_madd( mcol2, zzzz, res );
+    result->vec128 = res;
+}
+
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec )
+{
+    vec_float4 neg, res0, res1, res2;
+    neg = negatef4( vec->vec128 );
+    res0 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_0ZB0 );
+    res1 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_C0X0 );
+    res2 = spu_shuffle( vec->vec128, neg, _VECTORMATH_SHUF_YA00 );
+    result->col0.vec128 = res0;
+    result->col1.vec128 = res1;
+    result->col2.vec128 = res2;
+}
+
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat )
+{
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathV3Cross( &tmpV3_0, vec, &mat->col0 );
+    vmathV3Cross( &tmpV3_1, vec, &mat->col1 );
+    vmathV3Cross( &tmpV3_2, vec, &mat->col2 );
+    vmathM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos_v.h
index 986612ebe..330dfda38 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_aos_v.h
@@ -1,1029 +1,1029 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_V_C_H
-#define _VECTORMATH_MAT_AOS_V_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
-#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
-#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
-#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
-#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
-{
-    VmathMatrix3 result;
-    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
-    return result;
-}
-
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
-{
-    vmathM3SetCol0(result, &_col0);
-}
-
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
-{
-    vmathM3SetCol1(result, &_col1);
-}
-
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
-{
-    vmathM3SetCol2(result, &_col2);
-}
-
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
-{
-    vmathM3SetCol(result, col, &vec);
-}
-
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
-{
-    vmathM3SetRow(result, row, &vec);
-}
-
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
-{
-    vmathM3SetElem(result, col, row, val);
-}
-
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
-{
-    return vmathM3GetElem(&mat, col, row);
-}
-
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathM3GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
-{
-    VmathVector3 result;
-    vmathM3GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
-{
-    VmathVector3 result;
-    vmathM3GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Inverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM3Determinant_V( VmathMatrix3 mat )
-{
-    return vmathM3Determinant(&mat);
-}
-
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
-{
-    VmathMatrix3 result;
-    vmathM3ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathM3MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
-{
-    VmathMatrix3 result;
-    vmathM3MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeIdentity_V( )
-{
-    VmathMatrix3 result;
-    vmathM3MakeIdentity(&result);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix3 result;
-    vmathM3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix3 result;
-    vmathM3AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathM3PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
-{
-    VmathMatrix3 result;
-    vmathM3Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM3Print_V( VmathMatrix3 mat )
-{
-    vmathM3Print(&mat);
-}
-
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
-{
-    vmathM3Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromT3(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
-{
-    vmathM4SetCol0(result, &_col0);
-}
-
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
-{
-    vmathM4SetCol1(result, &_col1);
-}
-
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
-{
-    vmathM4SetCol2(result, &_col2);
-}
-
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
-{
-    vmathM4SetCol3(result, &_col3);
-}
-
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
-{
-    vmathM4SetCol(result, col, &vec);
-}
-
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
-{
-    vmathM4SetRow(result, row, &vec);
-}
-
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
-{
-    vmathM4SetElem(result, col, row, val);
-}
-
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
-{
-    return vmathM4GetElem(&mat, col, row);
-}
-
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
-{
-    VmathVector4 result;
-    vmathM4GetCol3(&result, &mat);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
-{
-    VmathVector4 result;
-    vmathM4GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
-{
-    VmathVector4 result;
-    vmathM4GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Inverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AffineInverse(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4OrthoInverse(&result, &mat);
-    return result;
-}
-
-static inline float vmathM4Determinant_V( VmathMatrix4 mat )
-{
-    return vmathM4Determinant(&mat);
-}
-
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
-{
-    VmathMatrix4 result;
-    vmathM4ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV4(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathM4MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathM4MulP3(&result, &mat, &pnt);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulT3(&result, &mat, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
-{
-    VmathMatrix4 result;
-    vmathM4MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeIdentity_V( )
-{
-    VmathMatrix4 result;
-    vmathM4MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
-{
-    vmathM4SetUpper3x3(result, &mat3);
-}
-
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
-{
-    VmathMatrix3 result;
-    vmathM4GetUpper3x3(&result, &mat);
-    return result;
-}
-
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
-{
-    vmathM4SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
-{
-    VmathVector3 result;
-    vmathM4GetTranslation(&result, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathMatrix4 result;
-    vmathM4MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
-{
-    VmathMatrix4 result;
-    vmathM4AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
-{
-    VmathMatrix4 result;
-    vmathM4PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
-{
-    VmathMatrix4 result;
-    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    VmathMatrix4 result;
-    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
-{
-    VmathMatrix4 result;
-    vmathM4Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathM4Print_V( VmathMatrix4 mat )
-{
-    vmathM4Print(&mat);
-}
-
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
-{
-    vmathM4Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
-{
-    vmathT3SetCol0(result, &_col0);
-}
-
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
-{
-    vmathT3SetCol1(result, &_col1);
-}
-
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
-{
-    vmathT3SetCol2(result, &_col2);
-}
-
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
-{
-    vmathT3SetCol3(result, &_col3);
-}
-
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
-{
-    vmathT3SetCol(result, col, &vec);
-}
-
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
-{
-    vmathT3SetRow(result, row, &vec);
-}
-
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
-{
-    vmathT3SetElem(result, col, row, val);
-}
-
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
-{
-    return vmathT3GetElem(&tfrm, col, row);
-}
-
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol0(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol1(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol2(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetCol3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
-{
-    VmathVector3 result;
-    vmathT3GetCol(&result, &tfrm, col);
-    return result;
-}
-
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
-{
-    VmathVector4 result;
-    vmathT3GetRow(&result, &tfrm, row);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3Inverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3OrthoInverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3AbsPerElem(&result, &tfrm);
-    return result;
-}
-
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathT3MulV3(&result, &tfrm, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathT3MulP3(&result, &tfrm, &pnt);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3Mul(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
-{
-    VmathTransform3 result;
-    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeIdentity_V( )
-{
-    VmathTransform3 result;
-    vmathT3MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
-{
-    vmathT3SetUpper3x3(result, &tfrm);
-}
-
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
-{
-    VmathMatrix3 result;
-    vmathT3GetUpper3x3(&result, &tfrm);
-    return result;
-}
-
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
-{
-    vmathT3SetTranslation(result, &translateVec);
-}
-
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
-{
-    VmathVector3 result;
-    vmathT3GetTranslation(&result, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
-{
-    VmathTransform3 result;
-    vmathT3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
-{
-    VmathTransform3 result;
-    vmathT3AppendScale(&result, &tfrm, &scaleVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
-{
-    VmathTransform3 result;
-    vmathT3PrependScale(&result, &scaleVec, &tfrm);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
-{
-    VmathTransform3 result;
-    vmathT3MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
-{
-    VmathTransform3 result;
-    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathT3Print_V( VmathTransform3 tfrm )
-{
-    vmathT3Print(&tfrm);
-}
-
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
-{
-    vmathT3Prints(&tfrm, name);
-}
-
-#endif
-
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
-{
-    VmathQuat result;
-    vmathQMakeFromM3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
-{
-    VmathMatrix3 result;
-    vmathV3Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
-{
-    VmathMatrix4 result;
-    vmathV4Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathVector3 result;
-    vmathV3RowMul(&result, &vec, &mat);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrix(&result, &vec);
-    return result;
-}
-
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
-{
-    VmathMatrix3 result;
-    vmathV3CrossMatrixMul(&result, &vec, &mat);
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_V_C_H
+#define _VECTORMATH_MAT_AOS_V_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
+#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
+#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
+#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
+#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2 )
+{
+    VmathMatrix3 result;
+    vmathM3MakeFromCols(&result, &_col0, &_col1, &_col2);
+    return result;
+}
+
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 _col0 )
+{
+    vmathM3SetCol0(result, &_col0);
+}
+
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 _col1 )
+{
+    vmathM3SetCol1(result, &_col1);
+}
+
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 _col2 )
+{
+    vmathM3SetCol2(result, &_col2);
+}
+
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec )
+{
+    vmathM3SetCol(result, col, &vec);
+}
+
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec )
+{
+    vmathM3SetRow(result, row, &vec);
+}
+
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val )
+{
+    vmathM3SetElem(result, col, row, val);
+}
+
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row )
+{
+    return vmathM3GetElem(&mat, col, row);
+}
+
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathM3GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col )
+{
+    VmathVector3 result;
+    vmathM3GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row )
+{
+    VmathVector3 result;
+    vmathM3GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Inverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM3Determinant_V( VmathMatrix3 mat )
+{
+    return vmathM3Determinant(&mat);
+}
+
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar )
+{
+    VmathMatrix3 result;
+    vmathM3ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathM3MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 )
+{
+    VmathMatrix3 result;
+    vmathM3MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeIdentity_V( )
+{
+    VmathMatrix3 result;
+    vmathM3MakeIdentity(&result);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix3 result;
+    vmathM3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix3 result;
+    vmathM3AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathM3PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 )
+{
+    VmathMatrix3 result;
+    vmathM3Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM3Print_V( VmathMatrix3 mat )
+{
+    vmathM3Print(&mat);
+}
+
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name )
+{
+    vmathM3Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromT3(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 _col0, VmathVector4 _col1, VmathVector4 _col2, VmathVector4 _col3 )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromM3V3(&result, &mat, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 _col0 )
+{
+    vmathM4SetCol0(result, &_col0);
+}
+
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 _col1 )
+{
+    vmathM4SetCol1(result, &_col1);
+}
+
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 _col2 )
+{
+    vmathM4SetCol2(result, &_col2);
+}
+
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 _col3 )
+{
+    vmathM4SetCol3(result, &_col3);
+}
+
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec )
+{
+    vmathM4SetCol(result, col, &vec);
+}
+
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec )
+{
+    vmathM4SetRow(result, row, &vec);
+}
+
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val )
+{
+    vmathM4SetElem(result, col, row, val);
+}
+
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row )
+{
+    return vmathM4GetElem(&mat, col, row);
+}
+
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat )
+{
+    VmathVector4 result;
+    vmathM4GetCol3(&result, &mat);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col )
+{
+    VmathVector4 result;
+    vmathM4GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row )
+{
+    VmathVector4 result;
+    vmathM4GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Inverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AffineInverse(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4OrthoInverse(&result, &mat);
+    return result;
+}
+
+static inline float vmathM4Determinant_V( VmathMatrix4 mat )
+{
+    return vmathM4Determinant(&mat);
+}
+
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar )
+{
+    VmathMatrix4 result;
+    vmathM4ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV4(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathM4MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathM4MulP3(&result, &mat, &pnt);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulT3(&result, &mat, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 )
+{
+    VmathMatrix4 result;
+    vmathM4MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeIdentity_V( )
+{
+    VmathMatrix4 result;
+    vmathM4MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 )
+{
+    vmathM4SetUpper3x3(result, &mat3);
+}
+
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat )
+{
+    VmathMatrix3 result;
+    vmathM4GetUpper3x3(&result, &mat);
+    return result;
+}
+
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec )
+{
+    vmathM4SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat )
+{
+    VmathVector3 result;
+    vmathM4GetTranslation(&result, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathMatrix4 result;
+    vmathM4MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec )
+{
+    VmathMatrix4 result;
+    vmathM4AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat )
+{
+    VmathMatrix4 result;
+    vmathM4PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec )
+{
+    VmathMatrix4 result;
+    vmathM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    VmathMatrix4 result;
+    vmathM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 )
+{
+    VmathMatrix4 result;
+    vmathM4Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathM4Print_V( VmathMatrix4 mat )
+{
+    vmathM4Print(&mat);
+}
+
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name )
+{
+    vmathM4Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 _col0, VmathVector3 _col1, VmathVector3 _col2, VmathVector3 _col3 )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromM3V3(&result, &tfrm, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 _col0 )
+{
+    vmathT3SetCol0(result, &_col0);
+}
+
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 _col1 )
+{
+    vmathT3SetCol1(result, &_col1);
+}
+
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 _col2 )
+{
+    vmathT3SetCol2(result, &_col2);
+}
+
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 _col3 )
+{
+    vmathT3SetCol3(result, &_col3);
+}
+
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec )
+{
+    vmathT3SetCol(result, col, &vec);
+}
+
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec )
+{
+    vmathT3SetRow(result, row, &vec);
+}
+
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val )
+{
+    vmathT3SetElem(result, col, row, val);
+}
+
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row )
+{
+    return vmathT3GetElem(&tfrm, col, row);
+}
+
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol0(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol1(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol2(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetCol3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col )
+{
+    VmathVector3 result;
+    vmathT3GetCol(&result, &tfrm, col);
+    return result;
+}
+
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row )
+{
+    VmathVector4 result;
+    vmathT3GetRow(&result, &tfrm, row);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3Inverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3OrthoInverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3AbsPerElem(&result, &tfrm);
+    return result;
+}
+
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathT3MulV3(&result, &tfrm, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathT3MulP3(&result, &tfrm, &pnt);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3Mul(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 )
+{
+    VmathTransform3 result;
+    vmathT3MulPerElem(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeIdentity_V( )
+{
+    VmathTransform3 result;
+    vmathT3MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 tfrm )
+{
+    vmathT3SetUpper3x3(result, &tfrm);
+}
+
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm )
+{
+    VmathMatrix3 result;
+    vmathT3GetUpper3x3(&result, &tfrm);
+    return result;
+}
+
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec )
+{
+    vmathT3SetTranslation(result, &translateVec);
+}
+
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm )
+{
+    VmathVector3 result;
+    vmathT3GetTranslation(&result, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat )
+{
+    VmathTransform3 result;
+    vmathT3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec )
+{
+    VmathTransform3 result;
+    vmathT3AppendScale(&result, &tfrm, &scaleVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm )
+{
+    VmathTransform3 result;
+    vmathT3PrependScale(&result, &scaleVec, &tfrm);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec )
+{
+    VmathTransform3 result;
+    vmathT3MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 )
+{
+    VmathTransform3 result;
+    vmathT3Select(&result, &tfrm0, &tfrm1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathT3Print_V( VmathTransform3 tfrm )
+{
+    vmathT3Print(&tfrm);
+}
+
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name )
+{
+    vmathT3Prints(&tfrm, name);
+}
+
+#endif
+
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 tfrm )
+{
+    VmathQuat result;
+    vmathQMakeFromM3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 tfrm0, VmathVector3 tfrm1 )
+{
+    VmathMatrix3 result;
+    vmathV3Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 tfrm0, VmathVector4 tfrm1 )
+{
+    VmathMatrix4 result;
+    vmathV4Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathVector3 result;
+    vmathV3RowMul(&result, &vec, &mat);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrix(&result, &vec);
+    return result;
+}
+
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat )
+{
+    VmathMatrix3 result;
+    vmathV3CrossMatrixMul(&result, &vec, &mat);
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa.h
index c2c3734e0..32234b7e9 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa.h
@@ -1,1493 +1,1493 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_C_H
-#define _VECTORMATH_MAT_SOA_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( &result->col0, &mat->col0 );
-    vmathSoaV3Copy( &result->col1, &mat->col1 );
-    vmathSoaV3Copy( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar )
-{
-    vmathSoaV3MakeFromScalar( &result->col0, scalar );
-    vmathSoaV3MakeFromScalar( &result->col1, scalar );
-    vmathSoaV3MakeFromScalar( &result->col2, scalar );
-}
-
-static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
-{
-    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat->x;
-    qy = unitQuat->y;
-    qz = unitQuat->z;
-    qw = unitQuat->w;
-    qx2 = spu_add( qx, qx );
-    qy2 = spu_add( qy, qy );
-    qz2 = spu_add( qz, qz );
-    qxqx2 = spu_mul( qx, qx2 );
-    qxqy2 = spu_mul( qx, qy2 );
-    qxqz2 = spu_mul( qx, qz2 );
-    qxqw2 = spu_mul( qw, qx2 );
-    qyqy2 = spu_mul( qy, qy2 );
-    qyqz2 = spu_mul( qy, qz2 );
-    qyqw2 = spu_mul( qw, qy2 );
-    qzqz2 = spu_mul( qz, qz2 );
-    qzqw2 = spu_mul( qw, qz2 );
-    vmathSoaV3MakeFromElems( &result->col0, spu_sub( spu_sub( spu_splats(1.0f), qyqy2 ), qzqz2 ), spu_add( qxqy2, qzqw2 ), spu_sub( qxqz2, qyqw2 ) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_sub( qxqy2, qzqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qzqz2 ), spu_add( qyqz2, qxqw2 ) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_add( qxqz2, qyqw2 ), spu_sub( qyqz2, qxqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qyqy2 ) );
-}
-
-static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-    vmathSoaV3Copy( &result->col1, _col1 );
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat )
-{
-    vmathSoaV3MakeFromAos( &result->col0, &mat->col0 );
-    vmathSoaV3MakeFromAos( &result->col1, &mat->col1 );
-    vmathSoaV3MakeFromAos( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 )
-{
-    vmathSoaV3MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
-    vmathSoaV3MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
-    vmathSoaV3MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
-}
-
-static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
-{
-    vmathSoaV3Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV3Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV3Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-}
-
-static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col1 )
-{
-    vmathSoaV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3SetElem( &result->col0, row, vmathSoaV3GetElem( vec, 0 ) );
-    vmathSoaV3SetElem( &result->col1, row, vmathSoaV3GetElem( vec, 1 ) );
-    vmathSoaV3SetElem( &result->col2, row, vmathSoaV3GetElem( vec, 2 ) );
-}
-
-static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3GetCol( &tmpV3_0, result, col );
-    vmathSoaV3SetElem( &tmpV3_0, row, val );
-    vmathSoaM3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3GetCol( &tmpV3_0, mat, col );
-    return vmathSoaV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col0 );
-}
-
-static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col1 );
-}
-
-static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Copy( result, &mat->col2 );
-}
-
-static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col )
-{
-    vmathSoaV3Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row )
-{
-    vmathSoaV3MakeFromElems( result, vmathSoaV3GetElem( &mat->col0, row ), vmathSoaV3GetElem( &mat->col1, row ), vmathSoaV3GetElem( &mat->col2, row ) );
-}
-
-static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaMatrix3 tmpResult;
-    vmathSoaV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
-    vmathSoaV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
-    vmathSoaV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
-    vmathSoaM3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmp0, tmp1, tmp2;
-    vec_float4 detinv;
-    vmathSoaV3Cross( &tmp0, &mat->col1, &mat->col2 );
-    vmathSoaV3Cross( &tmp1, &mat->col2, &mat->col0 );
-    vmathSoaV3Cross( &tmp2, &mat->col0, &mat->col1 );
-    detinv = recipf4( vmathSoaV3Dot( &mat->col2, &tmp2 ) );
-    vmathSoaV3MakeFromElems( &result->col0, spu_mul( tmp0.x, detinv ), spu_mul( tmp1.x, detinv ), spu_mul( tmp2.x, detinv ) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_mul( tmp0.y, detinv ), spu_mul( tmp1.y, detinv ), spu_mul( tmp2.y, detinv ) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_mul( tmp0.z, detinv ), spu_mul( tmp1.z, detinv ), spu_mul( tmp2.z, detinv ) );
-}
-
-static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
-    return vmathSoaV3Dot( &mat->col2, &tmpV3_0 );
-}
-
-static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3Add( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3Neg( &result->col0, &mat->col0 );
-    vmathSoaV3Neg( &result->col1, &mat->col1 );
-    vmathSoaV3Neg( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3AbsPerElem( &result->col0, &mat->col0 );
-    vmathSoaV3AbsPerElem( &result->col1, &mat->col1 );
-    vmathSoaV3AbsPerElem( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar )
-{
-    vmathSoaV3ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathSoaV3ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathSoaV3ScalarMul( &result->col2, &mat->col2, scalar );
-}
-
-static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) );
-    tmpY = spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) );
-    tmpZ = spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    VmathSoaMatrix3 tmpResult;
-    vmathSoaM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathSoaM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathSoaM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathSoaM3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
-{
-    vmathSoaV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-}
-
-static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), c, s );
-    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c );
-}
-
-static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ) );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeFromElems( &result->col2, s, spu_splats(0.0f), c );
-}
-
-static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, s, spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f) );
-    vmathSoaV3MakeZAxis( &result->col2 );
-}
-
-static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    vmathSoaV3MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) );
-}
-
-static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = spu_mul( x, y );
-    yz = spu_mul( y, z );
-    zx = spu_mul( z, x );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    vmathSoaV3MakeFromElems( &result->col0, spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ) );
-}
-
-static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
-{
-    vmathSoaM3MakeFromQ( result, unitQuat );
-}
-
-static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z );
-}
-
-static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
-}
-
-static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat )
-{
-    vmathSoaV3MulPerElem( &result->col0, &mat->col0, scaleVec );
-    vmathSoaV3MulPerElem( &result->col1, &mat->col1, scaleVec );
-    vmathSoaV3MulPerElem( &result->col2, &mat->col2, scaleVec );
-}
-
-static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 )
-{
-    vmathSoaV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathSoaV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathSoaV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat )
-{
-    VmathMatrix3 mat0, mat1, mat2, mat3;
-    vmathSoaM3Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathM3Print( &mat0 );
-    printf("slot 1:\n");
-    vmathM3Print( &mat1 );
-    printf("slot 2:\n");
-    vmathM3Print( &mat2 );
-    printf("slot 3:\n");
-    vmathM3Print( &mat3 );
-}
-
-static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaM3Print( mat );
-}
-
-#endif
-
-static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( &result->col0, &mat->col0 );
-    vmathSoaV4Copy( &result->col1, &mat->col1 );
-    vmathSoaV4Copy( &result->col2, &mat->col2 );
-    vmathSoaV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar )
-{
-    vmathSoaV4MakeFromScalar( &result->col0, scalar );
-    vmathSoaV4MakeFromScalar( &result->col1, scalar );
-    vmathSoaV4MakeFromScalar( &result->col2, scalar );
-    vmathSoaV4MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat )
-{
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, &mat->col3, spu_splats(1.0f) );
-}
-
-static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0, const VmathSoaVector4 *_col1, const VmathSoaVector4 *_col2, const VmathSoaVector4 *_col3 )
-{
-    vmathSoaV4Copy( &result->col0, _col0 );
-    vmathSoaV4Copy( &result->col1, _col1 );
-    vmathSoaV4Copy( &result->col2, _col2 );
-    vmathSoaV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
-}
-
-static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
-{
-    VmathSoaMatrix3 mat;
-    vmathSoaM3MakeFromQ( &mat, unitQuat );
-    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat.col0, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat.col1, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat.col2, spu_splats(0.0f) );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
-}
-
-static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat )
-{
-    vmathSoaV4MakeFromAos( &result->col0, &mat->col0 );
-    vmathSoaV4MakeFromAos( &result->col1, &mat->col1 );
-    vmathSoaV4MakeFromAos( &result->col2, &mat->col2 );
-    vmathSoaV4MakeFromAos( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 )
-{
-    vmathSoaV4MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
-    vmathSoaV4MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
-    vmathSoaV4MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
-    vmathSoaV4MakeFrom4Aos( &result->col3, &mat0->col3, &mat1->col3, &mat2->col3, &mat3->col3 );
-}
-
-static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
-{
-    vmathSoaV4Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV4Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV4Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-    vmathSoaV4Get4Aos( &mat->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
-}
-
-static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0 )
-{
-    vmathSoaV4Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col1 )
-{
-    vmathSoaV4Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col2 )
-{
-    vmathSoaV4Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col3 )
-{
-    vmathSoaV4Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec )
-{
-    vmathSoaV4Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec )
-{
-    vmathSoaV4SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
-    vmathSoaV4SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
-    vmathSoaV4SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
-    vmathSoaV4SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector4 tmpV3_0;
-    vmathSoaM4GetCol( &tmpV3_0, result, col );
-    vmathSoaV4SetElem( &tmpV3_0, row, val );
-    vmathSoaM4SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row )
-{
-    VmathSoaVector4 tmpV4_0;
-    vmathSoaM4GetCol( &tmpV4_0, mat, col );
-    return vmathSoaV4GetElem( &tmpV4_0, row );
-}
-
-static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col0 );
-}
-
-static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col1 );
-}
-
-static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col2 );
-}
-
-static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Copy( result, &mat->col3 );
-}
-
-static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col )
-{
-    vmathSoaV4Copy( result, (&mat->col0 + col) );
-}
-
-static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row )
-{
-    vmathSoaV4MakeFromElems( result, vmathSoaV4GetElem( &mat->col0, row ), vmathSoaV4GetElem( &mat->col1, row ), vmathSoaV4GetElem( &mat->col2, row ), vmathSoaV4GetElem( &mat->col3, row ) );
-}
-
-static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaMatrix4 tmpResult;
-    vmathSoaV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
-    vmathSoaV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
-    vmathSoaV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
-    vmathSoaV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaVector4 res0, res1, res2, res3;
-    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
-    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
-    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
-    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
-    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
-    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
-    vmathSoaV4SetX( &res0, spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) ) );
-    vmathSoaV4SetY( &res0, spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) ) );
-    vmathSoaV4SetZ( &res0, spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) ) );
-    vmathSoaV4SetW( &res0, spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) ) );
-    detInv = recipf4( spu_add( spu_add( spu_add( spu_mul( mA, res0.x ), spu_mul( mE, res0.y ) ), spu_mul( mI, res0.z ) ), spu_mul( mM, res0.w ) ) );
-    vmathSoaV4SetX( &res1, spu_mul( mI, tmp1 ) );
-    vmathSoaV4SetY( &res1, spu_mul( mM, tmp0 ) );
-    vmathSoaV4SetZ( &res1, spu_mul( mA, tmp1 ) );
-    vmathSoaV4SetW( &res1, spu_mul( mE, tmp0 ) );
-    vmathSoaV4SetX( &res3, spu_mul( mI, tmp3 ) );
-    vmathSoaV4SetY( &res3, spu_mul( mM, tmp2 ) );
-    vmathSoaV4SetZ( &res3, spu_mul( mA, tmp3 ) );
-    vmathSoaV4SetW( &res3, spu_mul( mE, tmp2 ) );
-    vmathSoaV4SetX( &res2, spu_mul( mI, tmp5 ) );
-    vmathSoaV4SetY( &res2, spu_mul( mM, tmp4 ) );
-    vmathSoaV4SetZ( &res2, spu_mul( mA, tmp5 ) );
-    vmathSoaV4SetW( &res2, spu_mul( mE, tmp4 ) );
-    tmp0 = spu_sub( spu_mul( mI, mB ), spu_mul( mA, mJ ) );
-    tmp1 = spu_sub( spu_mul( mM, mF ), spu_mul( mE, mN ) );
-    tmp2 = spu_sub( spu_mul( mI, mD ), spu_mul( mA, mL ) );
-    tmp3 = spu_sub( spu_mul( mM, mH ), spu_mul( mE, mP ) );
-    tmp4 = spu_sub( spu_mul( mI, mC ), spu_mul( mA, mK ) );
-    tmp5 = spu_sub( spu_mul( mM, mG ), spu_mul( mE, mO ) );
-    vmathSoaV4SetX( &res2, spu_add( spu_sub( spu_mul( mL, tmp1 ), spu_mul( mJ, tmp3 ) ), res2.x ) );
-    vmathSoaV4SetY( &res2, spu_add( spu_sub( spu_mul( mP, tmp0 ), spu_mul( mN, tmp2 ) ), res2.y ) );
-    vmathSoaV4SetZ( &res2, spu_sub( spu_sub( spu_mul( mB, tmp3 ), spu_mul( mD, tmp1 ) ), res2.z ) );
-    vmathSoaV4SetW( &res2, spu_sub( spu_sub( spu_mul( mF, tmp2 ), spu_mul( mH, tmp0 ) ), res2.w ) );
-    vmathSoaV4SetX( &res3, spu_add( spu_sub( spu_mul( mJ, tmp5 ), spu_mul( mK, tmp1 ) ), res3.x ) );
-    vmathSoaV4SetY( &res3, spu_add( spu_sub( spu_mul( mN, tmp4 ), spu_mul( mO, tmp0 ) ), res3.y ) );
-    vmathSoaV4SetZ( &res3, spu_sub( spu_sub( spu_mul( mC, tmp1 ), spu_mul( mB, tmp5 ) ), res3.z ) );
-    vmathSoaV4SetW( &res3, spu_sub( spu_sub( spu_mul( mG, tmp0 ), spu_mul( mF, tmp4 ) ), res3.w ) );
-    vmathSoaV4SetX( &res1, spu_sub( spu_sub( spu_mul( mK, tmp3 ), spu_mul( mL, tmp5 ) ), res1.x ) );
-    vmathSoaV4SetY( &res1, spu_sub( spu_sub( spu_mul( mO, tmp2 ), spu_mul( mP, tmp4 ) ), res1.y ) );
-    vmathSoaV4SetZ( &res1, spu_add( spu_sub( spu_mul( mD, tmp5 ), spu_mul( mC, tmp3 ) ), res1.z ) );
-    vmathSoaV4SetW( &res1, spu_add( spu_sub( spu_mul( mH, tmp4 ), spu_mul( mG, tmp2 ) ), res1.w ) );
-    vmathSoaV4ScalarMul( &result->col0, &res0, detInv );
-    vmathSoaV4ScalarMul( &result->col1, &res1, detInv );
-    vmathSoaV4ScalarMul( &result->col2, &res2, detInv );
-    vmathSoaV4ScalarMul( &result->col3, &res3, detInv );
-}
-
-static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaTransform3 affineMat, tmpT3_0;
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathSoaT3Inverse( &tmpT3_0, &affineMat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaTransform3 affineMat, tmpT3_0;
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
-    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
-    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
-    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
-    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
-    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
-    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
-    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
-    vmathSoaT3OrthoInverse( &tmpT3_0, &affineMat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat )
-{
-    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat->col0.x;
-    mB = mat->col0.y;
-    mC = mat->col0.z;
-    mD = mat->col0.w;
-    mE = mat->col1.x;
-    mF = mat->col1.y;
-    mG = mat->col1.z;
-    mH = mat->col1.w;
-    mI = mat->col2.x;
-    mJ = mat->col2.y;
-    mK = mat->col2.z;
-    mL = mat->col2.w;
-    mM = mat->col3.x;
-    mN = mat->col3.y;
-    mO = mat->col3.z;
-    mP = mat->col3.w;
-    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
-    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
-    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
-    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
-    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
-    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
-    dx = spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) );
-    dy = spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) );
-    dz = spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) );
-    dw = spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) );
-    return spu_add( spu_add( spu_add( spu_mul( mA, dx ), spu_mul( mE, dy ) ), spu_mul( mI, dz ) ), spu_mul( mM, dw ) );
-}
-
-static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4Add( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4Add( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4Add( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4Add( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4Neg( &result->col0, &mat->col0 );
-    vmathSoaV4Neg( &result->col1, &mat->col1 );
-    vmathSoaV4Neg( &result->col2, &mat->col2 );
-    vmathSoaV4Neg( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4AbsPerElem( &result->col0, &mat->col0 );
-    vmathSoaV4AbsPerElem( &result->col1, &mat->col1 );
-    vmathSoaV4AbsPerElem( &result->col2, &mat->col2 );
-    vmathSoaV4AbsPerElem( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar )
-{
-    vmathSoaV4ScalarMul( &result->col0, &mat->col0, scalar );
-    vmathSoaV4ScalarMul( &result->col1, &mat->col1, scalar );
-    vmathSoaV4ScalarMul( &result->col2, &mat->col2, scalar );
-    vmathSoaV4ScalarMul( &result->col3, &mat->col3, scalar );
-}
-
-static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = spu_add( spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) ), spu_mul( mat->col3.x, vec->w ) );
-    tmpY = spu_add( spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) ), spu_mul( mat->col3.y, vec->w ) );
-    tmpZ = spu_add( spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) ), spu_mul( mat->col3.z, vec->w ) );
-    tmpW = spu_add( spu_add( spu_add( spu_mul( mat->col0.w, vec->x ), spu_mul( mat->col1.w, vec->y ) ), spu_mul( mat->col2.w, vec->z ) ), spu_mul( mat->col3.w, vec->w ) );
-    vmathSoaV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec )
-{
-    result->x = spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) );
-    result->y = spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) );
-    result->z = spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) );
-    result->w = spu_add( spu_add( spu_mul( mat->col0.w, vec->x ), spu_mul( mat->col1.w, vec->y ) ), spu_mul( mat->col2.w, vec->z ) );
-}
-
-static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt )
-{
-    result->x = spu_add( spu_add( spu_add( spu_mul( mat->col0.x, pnt->x ), spu_mul( mat->col1.x, pnt->y ) ), spu_mul( mat->col2.x, pnt->z ) ), mat->col3.x );
-    result->y = spu_add( spu_add( spu_add( spu_mul( mat->col0.y, pnt->x ), spu_mul( mat->col1.y, pnt->y ) ), spu_mul( mat->col2.y, pnt->z ) ), mat->col3.y );
-    result->z = spu_add( spu_add( spu_add( spu_mul( mat->col0.z, pnt->x ), spu_mul( mat->col1.z, pnt->y ) ), spu_mul( mat->col2.z, pnt->z ) ), mat->col3.z );
-    result->w = spu_add( spu_add( spu_add( spu_mul( mat->col0.w, pnt->x ), spu_mul( mat->col1.w, pnt->y ) ), spu_mul( mat->col2.w, pnt->z ) ), mat->col3.w );
-}
-
-static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    VmathSoaMatrix4 tmpResult;
-    vmathSoaM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
-    vmathSoaM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
-    vmathSoaM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
-    vmathSoaM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm1 )
-{
-    VmathSoaMatrix4 tmpResult;
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
-    vmathSoaM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
-    vmathSoaM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
-    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathSoaM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
-    vmathSoaM4Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
-{
-    vmathSoaV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
-    vmathSoaV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
-    vmathSoaV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
-    vmathSoaV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
-}
-
-static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result )
-{
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 )
-{
-    vmathSoaV4SetXYZ( &result->col0, &mat3->col0 );
-    vmathSoaV4SetXYZ( &result->col1, &mat3->col1 );
-    vmathSoaV4SetXYZ( &result->col2, &mat3->col2 );
-}
-
-static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4GetXYZ( &result->col0, &mat->col0 );
-    vmathSoaV4GetXYZ( &result->col1, &mat->col1 );
-    vmathSoaV4GetXYZ( &result->col2, &mat->col2 );
-}
-
-static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4SetXYZ( &result->col3, translateVec );
-}
-
-static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat )
-{
-    vmathSoaV4GetXYZ( result, &mat->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), c, s, spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c, spu_splats(0.0f) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ), spu_splats(0.0f) );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeFromElems( &result->col2, s, spu_splats(0.0f), c, spu_splats(0.0f) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV4MakeFromElems( &result->col0, c, s, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    vmathSoaV4MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ), spu_splats(0.0f) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec->x;
-    y = unitVec->y;
-    z = unitVec->z;
-    xy = spu_mul( x, y );
-    yz = spu_mul( y, z );
-    zx = spu_mul( z, x );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    vmathSoaV4MakeFromElems( &result->col0, spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ), spu_splats(0.0f) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat )
-{
-    VmathSoaTransform3 tmpT3_0;
-    vmathSoaT3MakeRotationQ( &tmpT3_0, unitQuat );
-    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
-}
-
-static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV4MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z, spu_splats(0.0f) );
-    vmathSoaV4MakeWAxis( &result->col3 );
-}
-
-static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV4ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV4ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV4ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
-    vmathSoaV4Copy( &result->col3, &mat->col3 );
-}
-
-static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat )
-{
-    VmathSoaVector4 scale4;
-    vmathSoaV4MakeFromV3Scalar( &scale4, scaleVec, spu_splats(1.0f) );
-    vmathSoaV4MulPerElem( &result->col0, &mat->col0, &scale4 );
-    vmathSoaV4MulPerElem( &result->col1, &mat->col1, &scale4 );
-    vmathSoaV4MulPerElem( &result->col2, &mat->col2, &scale4 );
-    vmathSoaV4MulPerElem( &result->col3, &mat->col3, &scale4 );
-}
-
-static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV4MakeXAxis( &result->col0 );
-    vmathSoaV4MakeYAxis( &result->col1 );
-    vmathSoaV4MakeZAxis( &result->col2 );
-    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
-}
-
-static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec )
-{
-    VmathSoaMatrix4 m4EyeFrame;
-    VmathSoaVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
-    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    vmathSoaV3Normalize( &v3Y, upVec );
-    vmathSoaP3Sub( &tmpV3_0, eyePos, lookAtPos );
-    vmathSoaV3Normalize( &v3Z, &tmpV3_0 );
-    vmathSoaV3Cross( &tmpV3_1, &v3Y, &v3Z );
-    vmathSoaV3Normalize( &v3X, &tmpV3_1 );
-    vmathSoaV3Cross( &v3Y, &v3Z, &v3X );
-    vmathSoaV4MakeFromV3( &tmpV4_0, &v3X );
-    vmathSoaV4MakeFromV3( &tmpV4_1, &v3Y );
-    vmathSoaV4MakeFromV3( &tmpV4_2, &v3Z );
-    vmathSoaV4MakeFromP3( &tmpV4_3, eyePos );
-    vmathSoaM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
-    vmathSoaM4OrthoInverse( result, &m4EyeFrame );
-}
-
-static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 f, rangeInv;
-    f = tanf4( spu_sub( spu_splats( _VECTORMATH_PI_OVER_2 ), spu_mul( spu_splats(0.5f), fovyRadians ) ) );
-    rangeInv = recipf4( spu_sub( zNear, zFar ) );
-    vmathSoaV4MakeFromElems( &result->col0, divf4( f, aspect ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), f, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_add( zNear, zFar ), rangeInv ), spu_splats(-1.0f) );
-    vmathSoaV4MakeFromElems( &result->col3, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( spu_mul( zNear, zFar ), rangeInv ), spu_splats(2.0f) ), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = spu_add( right, left );
-    sum_tb = spu_add( top, bottom );
-    sum_nf = spu_add( zNear, zFar );
-    inv_rl = recipf4( spu_sub( right, left ) );
-    inv_tb = recipf4( spu_sub( top, bottom ) );
-    inv_nf = recipf4( spu_sub( zNear, zFar ) );
-    n2 = spu_add( zNear, zNear );
-    vmathSoaV4MakeFromElems( &result->col0, spu_mul( n2, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), spu_mul( n2, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_mul( sum_rl, inv_rl ), spu_mul( sum_tb, inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(-1.0f) );
-    vmathSoaV4MakeFromElems( &result->col3, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( n2, inv_nf ), zFar ), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = spu_add( right, left );
-    sum_tb = spu_add( top, bottom );
-    sum_nf = spu_add( zNear, zFar );
-    inv_rl = recipf4( spu_sub( right, left ) );
-    inv_tb = recipf4( spu_sub( top, bottom ) );
-    inv_nf = recipf4( spu_sub( zNear, zFar ) );
-    vmathSoaV4MakeFromElems( &result->col0, spu_add( inv_rl, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), spu_add( inv_tb, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), spu_add( inv_nf, inv_nf ), spu_splats(0.0f) );
-    vmathSoaV4MakeFromElems( &result->col3, spu_mul( negatef4( sum_rl ), inv_rl ), spu_mul( negatef4( sum_tb ), inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(1.0f) );
-}
-
-static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 )
-{
-    vmathSoaV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
-    vmathSoaV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
-    vmathSoaV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
-    vmathSoaV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat )
-{
-    VmathMatrix4 mat0, mat1, mat2, mat3;
-    vmathSoaM4Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathM4Print( &mat0 );
-    printf("slot 1:\n");
-    vmathM4Print( &mat1 );
-    printf("slot 2:\n");
-    vmathM4Print( &mat2 );
-    printf("slot 3:\n");
-    vmathM4Print( &mat3 );
-}
-
-static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaM4Print( mat );
-}
-
-#endif
-
-static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
-    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
-    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
-    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar )
-{
-    vmathSoaV3MakeFromScalar( &result->col0, scalar );
-    vmathSoaV3MakeFromScalar( &result->col1, scalar );
-    vmathSoaV3MakeFromScalar( &result->col2, scalar );
-    vmathSoaV3MakeFromScalar( &result->col3, scalar );
-}
-
-static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2, const VmathSoaVector3 *_col3 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-    vmathSoaV3Copy( &result->col1, _col1 );
-    vmathSoaV3Copy( &result->col2, _col2 );
-    vmathSoaV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaT3SetUpper3x3( result, tfrm );
-    vmathSoaT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathSoaT3SetUpper3x3( result, &tmpM3_0 );
-    vmathSoaT3SetTranslation( result, translateVec );
-}
-
-static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm )
-{
-    vmathSoaV3MakeFromAos( &result->col0, &tfrm->col0 );
-    vmathSoaV3MakeFromAos( &result->col1, &tfrm->col1 );
-    vmathSoaV3MakeFromAos( &result->col2, &tfrm->col2 );
-    vmathSoaV3MakeFromAos( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 )
-{
-    vmathSoaV3MakeFrom4Aos( &result->col0, &tfrm0->col0, &tfrm1->col0, &tfrm2->col0, &tfrm3->col0 );
-    vmathSoaV3MakeFrom4Aos( &result->col1, &tfrm0->col1, &tfrm1->col1, &tfrm2->col1, &tfrm3->col1 );
-    vmathSoaV3MakeFrom4Aos( &result->col2, &tfrm0->col2, &tfrm1->col2, &tfrm2->col2, &tfrm3->col2 );
-    vmathSoaV3MakeFrom4Aos( &result->col3, &tfrm0->col3, &tfrm1->col3, &tfrm2->col3, &tfrm3->col3 );
-}
-
-static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
-{
-    vmathSoaV3Get4Aos( &tfrm->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
-    vmathSoaV3Get4Aos( &tfrm->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
-    vmathSoaV3Get4Aos( &tfrm->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
-    vmathSoaV3Get4Aos( &tfrm->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
-}
-
-static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0 )
-{
-    vmathSoaV3Copy( &result->col0, _col0 );
-}
-
-static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *_col1 )
-{
-    vmathSoaV3Copy( &result->col1, _col1 );
-}
-
-static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *_col2 )
-{
-    vmathSoaV3Copy( &result->col2, _col2 );
-}
-
-static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *_col3 )
-{
-    vmathSoaV3Copy( &result->col3, _col3 );
-}
-
-static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3Copy( (&result->col0 + col), vec );
-}
-
-static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec )
-{
-    vmathSoaV3SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
-    vmathSoaV3SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
-    vmathSoaV3SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
-    vmathSoaV3SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
-}
-
-static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaT3GetCol( &tmpV3_0, result, col );
-    vmathSoaV3SetElem( &tmpV3_0, row, val );
-    vmathSoaT3SetCol( result, col, &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaT3GetCol( &tmpV3_0, tfrm, col );
-    return vmathSoaV3GetElem( &tmpV3_0, row );
-}
-
-static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col0 );
-}
-
-static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col1 );
-}
-
-static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col )
-{
-    vmathSoaV3Copy( result, (&tfrm->col0 + col) );
-}
-
-static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row )
-{
-    vmathSoaV4MakeFromElems( result, vmathSoaV3GetElem( &tfrm->col0, row ), vmathSoaV3GetElem( &tfrm->col1, row ), vmathSoaV3GetElem( &tfrm->col2, row ), vmathSoaV3GetElem( &tfrm->col3, row ) );
-}
-
-static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    VmathSoaVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    vec_float4 detinv;
-    vmathSoaV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
-    vmathSoaV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
-    vmathSoaV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
-    detinv = recipf4( vmathSoaV3Dot( &tfrm->col2, &tmp2 ) );
-    vmathSoaV3MakeFromElems( &inv0, spu_mul( tmp0.x, detinv ), spu_mul( tmp1.x, detinv ), spu_mul( tmp2.x, detinv ) );
-    vmathSoaV3MakeFromElems( &inv1, spu_mul( tmp0.y, detinv ), spu_mul( tmp1.y, detinv ), spu_mul( tmp2.y, detinv ) );
-    vmathSoaV3MakeFromElems( &inv2, spu_mul( tmp0.z, detinv ), spu_mul( tmp1.z, detinv ), spu_mul( tmp2.z, detinv ) );
-    vmathSoaV3Copy( &result->col0, &inv0 );
-    vmathSoaV3Copy( &result->col1, &inv1 );
-    vmathSoaV3Copy( &result->col2, &inv2 );
-    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    VmathSoaVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
-    vmathSoaV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
-    vmathSoaV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
-    vmathSoaV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
-    vmathSoaV3Copy( &result->col0, &inv0 );
-    vmathSoaV3Copy( &result->col1, &inv1 );
-    vmathSoaV3Copy( &result->col2, &inv2 );
-    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
-    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
-    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
-    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
-    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
-    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
-    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
-}
-
-static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3AbsPerElem( &result->col0, &tfrm->col0 );
-    vmathSoaV3AbsPerElem( &result->col1, &tfrm->col1 );
-    vmathSoaV3AbsPerElem( &result->col2, &tfrm->col2 );
-    vmathSoaV3AbsPerElem( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = spu_add( spu_add( spu_mul( tfrm->col0.x, vec->x ), spu_mul( tfrm->col1.x, vec->y ) ), spu_mul( tfrm->col2.x, vec->z ) );
-    tmpY = spu_add( spu_add( spu_mul( tfrm->col0.y, vec->x ), spu_mul( tfrm->col1.y, vec->y ) ), spu_mul( tfrm->col2.y, vec->z ) );
-    tmpZ = spu_add( spu_add( spu_mul( tfrm->col0.z, vec->x ), spu_mul( tfrm->col1.z, vec->y ) ), spu_mul( tfrm->col2.z, vec->z ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.x, pnt->x ), spu_mul( tfrm->col1.x, pnt->y ) ), spu_mul( tfrm->col2.x, pnt->z ) ), tfrm->col3.x );
-    tmpY = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.y, pnt->x ), spu_mul( tfrm->col1.y, pnt->y ) ), spu_mul( tfrm->col2.y, pnt->z ) ), tfrm->col3.y );
-    tmpZ = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.z, pnt->x ), spu_mul( tfrm->col1.z, pnt->y ) ), spu_mul( tfrm->col2.z, pnt->z ) ), tfrm->col3.z );
-    vmathSoaP3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
-{
-    VmathSoaTransform3 tmpResult;
-    VmathSoaPoint3 tmpP3_0, tmpP3_1;
-    vmathSoaT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
-    vmathSoaT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
-    vmathSoaT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
-    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
-    vmathSoaT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
-    vmathSoaV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
-    vmathSoaT3Copy( result, &tmpResult );
-}
-
-static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
-{
-    vmathSoaV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
-    vmathSoaV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
-    vmathSoaV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
-    vmathSoaV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
-}
-
-static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm )
-{
-    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
-    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
-    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
-}
-
-static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3Copy( result, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), c, s );
-    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ) );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeFromElems( &result->col2, s, spu_splats(0.0f), c );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    vmathSoaV3MakeFromElems( &result->col0, c, s, spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f) );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ->x, &sX, &cX );
-    sincosf4( radiansXYZ->y, &sY, &cY );
-    sincosf4( radiansXYZ->z, &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    vmathSoaV3MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
-    vmathSoaV3MakeFromScalar( &tmpV3_0, spu_splats(0.0f) );
-    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat )
-{
-    VmathSoaMatrix3 tmpM3_0;
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
-    vmathSoaV3MakeFromScalar( &tmpV3_0, spu_splats(0.0f) );
-    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
-}
-
-static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f) );
-    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z );
-    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
-}
-
-static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec )
-{
-    vmathSoaV3ScalarMul( &result->col0, &tfrm->col0, vmathSoaV3GetX( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col1, &tfrm->col1, vmathSoaV3GetY( scaleVec ) );
-    vmathSoaV3ScalarMul( &result->col2, &tfrm->col2, vmathSoaV3GetZ( scaleVec ) );
-    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
-}
-
-static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm )
-{
-    vmathSoaV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
-    vmathSoaV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
-    vmathSoaV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
-    vmathSoaV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
-}
-
-static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
-{
-    vmathSoaV3MakeXAxis( &result->col0 );
-    vmathSoaV3MakeYAxis( &result->col1 );
-    vmathSoaV3MakeZAxis( &result->col2 );
-    vmathSoaV3Copy( &result->col3, translateVec );
-}
-
-static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 )
-{
-    vmathSoaV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
-    vmathSoaV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
-    vmathSoaV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
-    vmathSoaV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm )
-{
-    VmathTransform3 mat0, mat1, mat2, mat3;
-    vmathSoaT3Get4Aos( tfrm, &mat0, &mat1, &mat2, &mat3 );
-    printf("slot 0:\n");
-    vmathT3Print( &mat0 );
-    printf("slot 1:\n");
-    vmathT3Print( &mat1 );
-    printf("slot 2:\n");
-    vmathT3Print( &mat2 );
-    printf("slot 3:\n");
-    vmathT3Print( &mat3 );
-}
-
-static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name )
-{
-    printf("%s:\n", name);
-    vmathSoaT3Print( tfrm );
-}
-
-#endif
-
-static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *tfrm )
-{
-    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
-    vec_uint4 largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm->col0.x;
-    yx = tfrm->col0.y;
-    zx = tfrm->col0.z;
-    xy = tfrm->col1.x;
-    yy = tfrm->col1.y;
-    zy = tfrm->col1.z;
-    xz = tfrm->col2.x;
-    yz = tfrm->col2.y;
-    zz = tfrm->col2.z;
-
-    trace = spu_add( spu_add( xx, yy ), zz );
-
-    negTrace = spu_cmpgt( spu_splats(0.0f), trace );
-    ZgtX = spu_cmpgt( zz, xx );
-    ZgtY = spu_cmpgt( zz, yy );
-    YgtX = spu_cmpgt( yy, xx );
-    largestXorY = spu_and( negTrace, spu_nand( ZgtX, ZgtY ) );
-    largestYorZ = spu_and( negTrace, spu_or( YgtX, ZgtX ) );
-    largestZorX = spu_and( negTrace, spu_orc( ZgtY, YgtX ) );
-    
-    zz = spu_sel( zz, negatef4(zz), largestXorY );
-    xy = spu_sel( xy, negatef4(xy), largestXorY );
-    xx = spu_sel( xx, negatef4(xx), largestYorZ );
-    yz = spu_sel( yz, negatef4(yz), largestYorZ );
-    yy = spu_sel( yy, negatef4(yy), largestZorX );
-    zx = spu_sel( zx, negatef4(zx), largestZorX );
-
-    radicand = spu_add( spu_add( spu_add( xx, yy ), zz ), spu_splats(1.0f) );
-    scale = spu_mul( spu_splats(0.5f), rsqrtf4( radicand ) );
-
-    tmpx = spu_mul( spu_sub( zy, yz ), scale );
-    tmpy = spu_mul( spu_sub( xz, zx ), scale );
-    tmpz = spu_mul( spu_sub( yx, xy ), scale );
-    tmpw = spu_mul( radicand, scale );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    qx = spu_sel( qx, tmpw, largestXorY );
-    qy = spu_sel( qy, tmpz, largestXorY );
-    qz = spu_sel( qz, tmpy, largestXorY );
-    qw = spu_sel( qw, tmpx, largestXorY );
-    tmpx = qx;
-    tmpz = qz;
-    qx = spu_sel( qx, qy, largestYorZ );
-    qy = spu_sel( qy, tmpx, largestYorZ );
-    qz = spu_sel( qz, qw, largestYorZ );
-    qw = spu_sel( qw, tmpz, largestYorZ );
-
-    result->x = qx;
-    result->y = qy;
-    result->z = qz;
-    result->w = qw;
-}
-
-static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *tfrm0, const VmathSoaVector3 *tfrm1 )
-{
-    vmathSoaV3ScalarMul( &result->col0, tfrm0, vmathSoaV3GetX( tfrm1 ) );
-    vmathSoaV3ScalarMul( &result->col1, tfrm0, vmathSoaV3GetY( tfrm1 ) );
-    vmathSoaV3ScalarMul( &result->col2, tfrm0, vmathSoaV3GetZ( tfrm1 ) );
-}
-
-static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *tfrm0, const VmathSoaVector4 *tfrm1 )
-{
-    vmathSoaV4ScalarMul( &result->col0, tfrm0, vmathSoaV4GetX( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col1, tfrm0, vmathSoaV4GetY( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col2, tfrm0, vmathSoaV4GetZ( tfrm1 ) );
-    vmathSoaV4ScalarMul( &result->col3, tfrm0, vmathSoaV4GetW( tfrm1 ) );
-}
-
-static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = spu_add( spu_add( spu_mul( vec->x, mat->col0.x ), spu_mul( vec->y, mat->col0.y ) ), spu_mul( vec->z, mat->col0.z ) );
-    tmpY = spu_add( spu_add( spu_mul( vec->x, mat->col1.x ), spu_mul( vec->y, mat->col1.y ) ), spu_mul( vec->z, mat->col1.z ) );
-    tmpZ = spu_add( spu_add( spu_mul( vec->x, mat->col2.x ), spu_mul( vec->y, mat->col2.y ) ), spu_mul( vec->z, mat->col2.z ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec )
-{
-    vmathSoaV3MakeFromElems( &result->col0, spu_splats(0.0f), vec->z, negatef4( vec->y ) );
-    vmathSoaV3MakeFromElems( &result->col1, negatef4( vec->z ), spu_splats(0.0f), vec->x );
-    vmathSoaV3MakeFromElems( &result->col2, vec->y, negatef4( vec->x ), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2;
-    vmathSoaV3Cross( &tmpV3_0, vec, &mat->col0 );
-    vmathSoaV3Cross( &tmpV3_1, vec, &mat->col1 );
-    vmathSoaV3Cross( &tmpV3_2, vec, &mat->col2 );
-    vmathSoaM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_C_H
+#define _VECTORMATH_MAT_SOA_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( &result->col0, &mat->col0 );
+    vmathSoaV3Copy( &result->col1, &mat->col1 );
+    vmathSoaV3Copy( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar )
+{
+    vmathSoaV3MakeFromScalar( &result->col0, scalar );
+    vmathSoaV3MakeFromScalar( &result->col1, scalar );
+    vmathSoaV3MakeFromScalar( &result->col2, scalar );
+}
+
+static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
+{
+    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat->x;
+    qy = unitQuat->y;
+    qz = unitQuat->z;
+    qw = unitQuat->w;
+    qx2 = spu_add( qx, qx );
+    qy2 = spu_add( qy, qy );
+    qz2 = spu_add( qz, qz );
+    qxqx2 = spu_mul( qx, qx2 );
+    qxqy2 = spu_mul( qx, qy2 );
+    qxqz2 = spu_mul( qx, qz2 );
+    qxqw2 = spu_mul( qw, qx2 );
+    qyqy2 = spu_mul( qy, qy2 );
+    qyqz2 = spu_mul( qy, qz2 );
+    qyqw2 = spu_mul( qw, qy2 );
+    qzqz2 = spu_mul( qz, qz2 );
+    qzqw2 = spu_mul( qw, qz2 );
+    vmathSoaV3MakeFromElems( &result->col0, spu_sub( spu_sub( spu_splats(1.0f), qyqy2 ), qzqz2 ), spu_add( qxqy2, qzqw2 ), spu_sub( qxqz2, qyqw2 ) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_sub( qxqy2, qzqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qzqz2 ), spu_add( qyqz2, qxqw2 ) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_add( qxqz2, qyqw2 ), spu_sub( qyqz2, qxqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qyqy2 ) );
+}
+
+static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+    vmathSoaV3Copy( &result->col1, _col1 );
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat )
+{
+    vmathSoaV3MakeFromAos( &result->col0, &mat->col0 );
+    vmathSoaV3MakeFromAos( &result->col1, &mat->col1 );
+    vmathSoaV3MakeFromAos( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 )
+{
+    vmathSoaV3MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
+    vmathSoaV3MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
+    vmathSoaV3MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
+}
+
+static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
+{
+    vmathSoaV3Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV3Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV3Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+}
+
+static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col0 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col1 )
+{
+    vmathSoaV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3SetElem( &result->col0, row, vmathSoaV3GetElem( vec, 0 ) );
+    vmathSoaV3SetElem( &result->col1, row, vmathSoaV3GetElem( vec, 1 ) );
+    vmathSoaV3SetElem( &result->col2, row, vmathSoaV3GetElem( vec, 2 ) );
+}
+
+static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3GetCol( &tmpV3_0, result, col );
+    vmathSoaV3SetElem( &tmpV3_0, row, val );
+    vmathSoaM3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3GetCol( &tmpV3_0, mat, col );
+    return vmathSoaV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col0 );
+}
+
+static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col1 );
+}
+
+static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Copy( result, &mat->col2 );
+}
+
+static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col )
+{
+    vmathSoaV3Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row )
+{
+    vmathSoaV3MakeFromElems( result, vmathSoaV3GetElem( &mat->col0, row ), vmathSoaV3GetElem( &mat->col1, row ), vmathSoaV3GetElem( &mat->col2, row ) );
+}
+
+static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaMatrix3 tmpResult;
+    vmathSoaV3MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x );
+    vmathSoaV3MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y );
+    vmathSoaV3MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z );
+    vmathSoaM3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmp0, tmp1, tmp2;
+    vec_float4 detinv;
+    vmathSoaV3Cross( &tmp0, &mat->col1, &mat->col2 );
+    vmathSoaV3Cross( &tmp1, &mat->col2, &mat->col0 );
+    vmathSoaV3Cross( &tmp2, &mat->col0, &mat->col1 );
+    detinv = recipf4( vmathSoaV3Dot( &mat->col2, &tmp2 ) );
+    vmathSoaV3MakeFromElems( &result->col0, spu_mul( tmp0.x, detinv ), spu_mul( tmp1.x, detinv ), spu_mul( tmp2.x, detinv ) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_mul( tmp0.y, detinv ), spu_mul( tmp1.y, detinv ), spu_mul( tmp2.y, detinv ) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_mul( tmp0.z, detinv ), spu_mul( tmp1.z, detinv ), spu_mul( tmp2.z, detinv ) );
+}
+
+static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3Cross( &tmpV3_0, &mat->col0, &mat->col1 );
+    return vmathSoaV3Dot( &mat->col2, &tmpV3_0 );
+}
+
+static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3Add( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3Sub( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3Neg( &result->col0, &mat->col0 );
+    vmathSoaV3Neg( &result->col1, &mat->col1 );
+    vmathSoaV3Neg( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3AbsPerElem( &result->col0, &mat->col0 );
+    vmathSoaV3AbsPerElem( &result->col1, &mat->col1 );
+    vmathSoaV3AbsPerElem( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar )
+{
+    vmathSoaV3ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathSoaV3ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathSoaV3ScalarMul( &result->col2, &mat->col2, scalar );
+}
+
+static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) );
+    tmpY = spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) );
+    tmpZ = spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    VmathSoaMatrix3 tmpResult;
+    vmathSoaM3MulV3( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathSoaM3MulV3( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathSoaM3MulV3( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathSoaM3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 )
+{
+    vmathSoaV3MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV3MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV3MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+}
+
+static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), c, s );
+    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c );
+}
+
+static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ) );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeFromElems( &result->col2, s, spu_splats(0.0f), c );
+}
+
+static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, s, spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f) );
+    vmathSoaV3MakeZAxis( &result->col2 );
+}
+
+static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    vmathSoaV3MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) );
+}
+
+static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = spu_mul( x, y );
+    yz = spu_mul( y, z );
+    zx = spu_mul( z, x );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    vmathSoaV3MakeFromElems( &result->col0, spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ) );
+}
+
+static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat )
+{
+    vmathSoaM3MakeFromQ( result, unitQuat );
+}
+
+static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z );
+}
+
+static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
+}
+
+static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat )
+{
+    vmathSoaV3MulPerElem( &result->col0, &mat->col0, scaleVec );
+    vmathSoaV3MulPerElem( &result->col1, &mat->col1, scaleVec );
+    vmathSoaV3MulPerElem( &result->col2, &mat->col2, scaleVec );
+}
+
+static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 )
+{
+    vmathSoaV3Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathSoaV3Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathSoaV3Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat )
+{
+    VmathMatrix3 mat0, mat1, mat2, mat3;
+    vmathSoaM3Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathM3Print( &mat0 );
+    printf("slot 1:\n");
+    vmathM3Print( &mat1 );
+    printf("slot 2:\n");
+    vmathM3Print( &mat2 );
+    printf("slot 3:\n");
+    vmathM3Print( &mat3 );
+}
+
+static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaM3Print( mat );
+}
+
+#endif
+
+static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( &result->col0, &mat->col0 );
+    vmathSoaV4Copy( &result->col1, &mat->col1 );
+    vmathSoaV4Copy( &result->col2, &mat->col2 );
+    vmathSoaV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar )
+{
+    vmathSoaV4MakeFromScalar( &result->col0, scalar );
+    vmathSoaV4MakeFromScalar( &result->col1, scalar );
+    vmathSoaV4MakeFromScalar( &result->col2, scalar );
+    vmathSoaV4MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat )
+{
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, &mat->col3, spu_splats(1.0f) );
+}
+
+static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0, const VmathSoaVector4 *_col1, const VmathSoaVector4 *_col2, const VmathSoaVector4 *_col3 )
+{
+    vmathSoaV4Copy( &result->col0, _col0 );
+    vmathSoaV4Copy( &result->col1, _col1 );
+    vmathSoaV4Copy( &result->col2, _col2 );
+    vmathSoaV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat->col0, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat->col1, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat->col2, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
+}
+
+static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
+{
+    VmathSoaMatrix3 mat;
+    vmathSoaM3MakeFromQ( &mat, unitQuat );
+    vmathSoaV4MakeFromV3Scalar( &result->col0, &mat.col0, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col1, &mat.col1, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col2, &mat.col2, spu_splats(0.0f) );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
+}
+
+static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat )
+{
+    vmathSoaV4MakeFromAos( &result->col0, &mat->col0 );
+    vmathSoaV4MakeFromAos( &result->col1, &mat->col1 );
+    vmathSoaV4MakeFromAos( &result->col2, &mat->col2 );
+    vmathSoaV4MakeFromAos( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 )
+{
+    vmathSoaV4MakeFrom4Aos( &result->col0, &mat0->col0, &mat1->col0, &mat2->col0, &mat3->col0 );
+    vmathSoaV4MakeFrom4Aos( &result->col1, &mat0->col1, &mat1->col1, &mat2->col1, &mat3->col1 );
+    vmathSoaV4MakeFrom4Aos( &result->col2, &mat0->col2, &mat1->col2, &mat2->col2, &mat3->col2 );
+    vmathSoaV4MakeFrom4Aos( &result->col3, &mat0->col3, &mat1->col3, &mat2->col3, &mat3->col3 );
+}
+
+static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
+{
+    vmathSoaV4Get4Aos( &mat->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV4Get4Aos( &mat->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV4Get4Aos( &mat->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+    vmathSoaV4Get4Aos( &mat->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
+}
+
+static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col0 )
+{
+    vmathSoaV4Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col1 )
+{
+    vmathSoaV4Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col2 )
+{
+    vmathSoaV4Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *_col3 )
+{
+    vmathSoaV4Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec )
+{
+    vmathSoaV4Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec )
+{
+    vmathSoaV4SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
+    vmathSoaV4SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
+    vmathSoaV4SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
+    vmathSoaV4SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector4 tmpV3_0;
+    vmathSoaM4GetCol( &tmpV3_0, result, col );
+    vmathSoaV4SetElem( &tmpV3_0, row, val );
+    vmathSoaM4SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row )
+{
+    VmathSoaVector4 tmpV4_0;
+    vmathSoaM4GetCol( &tmpV4_0, mat, col );
+    return vmathSoaV4GetElem( &tmpV4_0, row );
+}
+
+static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col0 );
+}
+
+static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col1 );
+}
+
+static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col2 );
+}
+
+static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Copy( result, &mat->col3 );
+}
+
+static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col )
+{
+    vmathSoaV4Copy( result, (&mat->col0 + col) );
+}
+
+static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row )
+{
+    vmathSoaV4MakeFromElems( result, vmathSoaV4GetElem( &mat->col0, row ), vmathSoaV4GetElem( &mat->col1, row ), vmathSoaV4GetElem( &mat->col2, row ), vmathSoaV4GetElem( &mat->col3, row ) );
+}
+
+static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaMatrix4 tmpResult;
+    vmathSoaV4MakeFromElems( &tmpResult.col0, mat->col0.x, mat->col1.x, mat->col2.x, mat->col3.x );
+    vmathSoaV4MakeFromElems( &tmpResult.col1, mat->col0.y, mat->col1.y, mat->col2.y, mat->col3.y );
+    vmathSoaV4MakeFromElems( &tmpResult.col2, mat->col0.z, mat->col1.z, mat->col2.z, mat->col3.z );
+    vmathSoaV4MakeFromElems( &tmpResult.col3, mat->col0.w, mat->col1.w, mat->col2.w, mat->col3.w );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaVector4 res0, res1, res2, res3;
+    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
+    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
+    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
+    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
+    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
+    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
+    vmathSoaV4SetX( &res0, spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) ) );
+    vmathSoaV4SetY( &res0, spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) ) );
+    vmathSoaV4SetZ( &res0, spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) ) );
+    vmathSoaV4SetW( &res0, spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) ) );
+    detInv = recipf4( spu_add( spu_add( spu_add( spu_mul( mA, res0.x ), spu_mul( mE, res0.y ) ), spu_mul( mI, res0.z ) ), spu_mul( mM, res0.w ) ) );
+    vmathSoaV4SetX( &res1, spu_mul( mI, tmp1 ) );
+    vmathSoaV4SetY( &res1, spu_mul( mM, tmp0 ) );
+    vmathSoaV4SetZ( &res1, spu_mul( mA, tmp1 ) );
+    vmathSoaV4SetW( &res1, spu_mul( mE, tmp0 ) );
+    vmathSoaV4SetX( &res3, spu_mul( mI, tmp3 ) );
+    vmathSoaV4SetY( &res3, spu_mul( mM, tmp2 ) );
+    vmathSoaV4SetZ( &res3, spu_mul( mA, tmp3 ) );
+    vmathSoaV4SetW( &res3, spu_mul( mE, tmp2 ) );
+    vmathSoaV4SetX( &res2, spu_mul( mI, tmp5 ) );
+    vmathSoaV4SetY( &res2, spu_mul( mM, tmp4 ) );
+    vmathSoaV4SetZ( &res2, spu_mul( mA, tmp5 ) );
+    vmathSoaV4SetW( &res2, spu_mul( mE, tmp4 ) );
+    tmp0 = spu_sub( spu_mul( mI, mB ), spu_mul( mA, mJ ) );
+    tmp1 = spu_sub( spu_mul( mM, mF ), spu_mul( mE, mN ) );
+    tmp2 = spu_sub( spu_mul( mI, mD ), spu_mul( mA, mL ) );
+    tmp3 = spu_sub( spu_mul( mM, mH ), spu_mul( mE, mP ) );
+    tmp4 = spu_sub( spu_mul( mI, mC ), spu_mul( mA, mK ) );
+    tmp5 = spu_sub( spu_mul( mM, mG ), spu_mul( mE, mO ) );
+    vmathSoaV4SetX( &res2, spu_add( spu_sub( spu_mul( mL, tmp1 ), spu_mul( mJ, tmp3 ) ), res2.x ) );
+    vmathSoaV4SetY( &res2, spu_add( spu_sub( spu_mul( mP, tmp0 ), spu_mul( mN, tmp2 ) ), res2.y ) );
+    vmathSoaV4SetZ( &res2, spu_sub( spu_sub( spu_mul( mB, tmp3 ), spu_mul( mD, tmp1 ) ), res2.z ) );
+    vmathSoaV4SetW( &res2, spu_sub( spu_sub( spu_mul( mF, tmp2 ), spu_mul( mH, tmp0 ) ), res2.w ) );
+    vmathSoaV4SetX( &res3, spu_add( spu_sub( spu_mul( mJ, tmp5 ), spu_mul( mK, tmp1 ) ), res3.x ) );
+    vmathSoaV4SetY( &res3, spu_add( spu_sub( spu_mul( mN, tmp4 ), spu_mul( mO, tmp0 ) ), res3.y ) );
+    vmathSoaV4SetZ( &res3, spu_sub( spu_sub( spu_mul( mC, tmp1 ), spu_mul( mB, tmp5 ) ), res3.z ) );
+    vmathSoaV4SetW( &res3, spu_sub( spu_sub( spu_mul( mG, tmp0 ), spu_mul( mF, tmp4 ) ), res3.w ) );
+    vmathSoaV4SetX( &res1, spu_sub( spu_sub( spu_mul( mK, tmp3 ), spu_mul( mL, tmp5 ) ), res1.x ) );
+    vmathSoaV4SetY( &res1, spu_sub( spu_sub( spu_mul( mO, tmp2 ), spu_mul( mP, tmp4 ) ), res1.y ) );
+    vmathSoaV4SetZ( &res1, spu_add( spu_sub( spu_mul( mD, tmp5 ), spu_mul( mC, tmp3 ) ), res1.z ) );
+    vmathSoaV4SetW( &res1, spu_add( spu_sub( spu_mul( mH, tmp4 ), spu_mul( mG, tmp2 ) ), res1.w ) );
+    vmathSoaV4ScalarMul( &result->col0, &res0, detInv );
+    vmathSoaV4ScalarMul( &result->col1, &res1, detInv );
+    vmathSoaV4ScalarMul( &result->col2, &res2, detInv );
+    vmathSoaV4ScalarMul( &result->col3, &res3, detInv );
+}
+
+static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaTransform3 affineMat, tmpT3_0;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathSoaT3Inverse( &tmpT3_0, &affineMat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaTransform3 affineMat, tmpT3_0;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    vmathSoaV4GetXYZ( &tmpV3_0, &mat->col0 );
+    vmathSoaT3SetCol0( &affineMat, &tmpV3_0 );
+    vmathSoaV4GetXYZ( &tmpV3_1, &mat->col1 );
+    vmathSoaT3SetCol1( &affineMat, &tmpV3_1 );
+    vmathSoaV4GetXYZ( &tmpV3_2, &mat->col2 );
+    vmathSoaT3SetCol2( &affineMat, &tmpV3_2 );
+    vmathSoaV4GetXYZ( &tmpV3_3, &mat->col3 );
+    vmathSoaT3SetCol3( &affineMat, &tmpV3_3 );
+    vmathSoaT3OrthoInverse( &tmpT3_0, &affineMat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat )
+{
+    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat->col0.x;
+    mB = mat->col0.y;
+    mC = mat->col0.z;
+    mD = mat->col0.w;
+    mE = mat->col1.x;
+    mF = mat->col1.y;
+    mG = mat->col1.z;
+    mH = mat->col1.w;
+    mI = mat->col2.x;
+    mJ = mat->col2.y;
+    mK = mat->col2.z;
+    mL = mat->col2.w;
+    mM = mat->col3.x;
+    mN = mat->col3.y;
+    mO = mat->col3.z;
+    mP = mat->col3.w;
+    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
+    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
+    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
+    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
+    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
+    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
+    dx = spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) );
+    dy = spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) );
+    dz = spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) );
+    dw = spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) );
+    return spu_add( spu_add( spu_add( spu_mul( mA, dx ), spu_mul( mE, dy ) ), spu_mul( mI, dz ) ), spu_mul( mM, dw ) );
+}
+
+static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4Add( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4Add( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4Add( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4Add( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4Sub( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4Sub( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4Sub( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4Sub( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4Neg( &result->col0, &mat->col0 );
+    vmathSoaV4Neg( &result->col1, &mat->col1 );
+    vmathSoaV4Neg( &result->col2, &mat->col2 );
+    vmathSoaV4Neg( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4AbsPerElem( &result->col0, &mat->col0 );
+    vmathSoaV4AbsPerElem( &result->col1, &mat->col1 );
+    vmathSoaV4AbsPerElem( &result->col2, &mat->col2 );
+    vmathSoaV4AbsPerElem( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar )
+{
+    vmathSoaV4ScalarMul( &result->col0, &mat->col0, scalar );
+    vmathSoaV4ScalarMul( &result->col1, &mat->col1, scalar );
+    vmathSoaV4ScalarMul( &result->col2, &mat->col2, scalar );
+    vmathSoaV4ScalarMul( &result->col3, &mat->col3, scalar );
+}
+
+static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = spu_add( spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) ), spu_mul( mat->col3.x, vec->w ) );
+    tmpY = spu_add( spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) ), spu_mul( mat->col3.y, vec->w ) );
+    tmpZ = spu_add( spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) ), spu_mul( mat->col3.z, vec->w ) );
+    tmpW = spu_add( spu_add( spu_add( spu_mul( mat->col0.w, vec->x ), spu_mul( mat->col1.w, vec->y ) ), spu_mul( mat->col2.w, vec->z ) ), spu_mul( mat->col3.w, vec->w ) );
+    vmathSoaV4MakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec )
+{
+    result->x = spu_add( spu_add( spu_mul( mat->col0.x, vec->x ), spu_mul( mat->col1.x, vec->y ) ), spu_mul( mat->col2.x, vec->z ) );
+    result->y = spu_add( spu_add( spu_mul( mat->col0.y, vec->x ), spu_mul( mat->col1.y, vec->y ) ), spu_mul( mat->col2.y, vec->z ) );
+    result->z = spu_add( spu_add( spu_mul( mat->col0.z, vec->x ), spu_mul( mat->col1.z, vec->y ) ), spu_mul( mat->col2.z, vec->z ) );
+    result->w = spu_add( spu_add( spu_mul( mat->col0.w, vec->x ), spu_mul( mat->col1.w, vec->y ) ), spu_mul( mat->col2.w, vec->z ) );
+}
+
+static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt )
+{
+    result->x = spu_add( spu_add( spu_add( spu_mul( mat->col0.x, pnt->x ), spu_mul( mat->col1.x, pnt->y ) ), spu_mul( mat->col2.x, pnt->z ) ), mat->col3.x );
+    result->y = spu_add( spu_add( spu_add( spu_mul( mat->col0.y, pnt->x ), spu_mul( mat->col1.y, pnt->y ) ), spu_mul( mat->col2.y, pnt->z ) ), mat->col3.y );
+    result->z = spu_add( spu_add( spu_add( spu_mul( mat->col0.z, pnt->x ), spu_mul( mat->col1.z, pnt->y ) ), spu_mul( mat->col2.z, pnt->z ) ), mat->col3.z );
+    result->w = spu_add( spu_add( spu_add( spu_mul( mat->col0.w, pnt->x ), spu_mul( mat->col1.w, pnt->y ) ), spu_mul( mat->col2.w, pnt->z ) ), mat->col3.w );
+}
+
+static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    VmathSoaMatrix4 tmpResult;
+    vmathSoaM4MulV4( &tmpResult.col0, mat0, &mat1->col0 );
+    vmathSoaM4MulV4( &tmpResult.col1, mat0, &mat1->col1 );
+    vmathSoaM4MulV4( &tmpResult.col2, mat0, &mat1->col2 );
+    vmathSoaM4MulV4( &tmpResult.col3, mat0, &mat1->col3 );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm1 )
+{
+    VmathSoaMatrix4 tmpResult;
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaM4MulV3( &tmpResult.col0, mat, &tfrm1->col0 );
+    vmathSoaM4MulV3( &tmpResult.col1, mat, &tfrm1->col1 );
+    vmathSoaM4MulV3( &tmpResult.col2, mat, &tfrm1->col2 );
+    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathSoaM4MulP3( &tmpResult.col3, mat, &tmpP3_0 );
+    vmathSoaM4Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 )
+{
+    vmathSoaV4MulPerElem( &result->col0, &mat0->col0, &mat1->col0 );
+    vmathSoaV4MulPerElem( &result->col1, &mat0->col1, &mat1->col1 );
+    vmathSoaV4MulPerElem( &result->col2, &mat0->col2, &mat1->col2 );
+    vmathSoaV4MulPerElem( &result->col3, &mat0->col3, &mat1->col3 );
+}
+
+static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result )
+{
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 )
+{
+    vmathSoaV4SetXYZ( &result->col0, &mat3->col0 );
+    vmathSoaV4SetXYZ( &result->col1, &mat3->col1 );
+    vmathSoaV4SetXYZ( &result->col2, &mat3->col2 );
+}
+
+static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4GetXYZ( &result->col0, &mat->col0 );
+    vmathSoaV4GetXYZ( &result->col1, &mat->col1 );
+    vmathSoaV4GetXYZ( &result->col2, &mat->col2 );
+}
+
+static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4SetXYZ( &result->col3, translateVec );
+}
+
+static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat )
+{
+    vmathSoaV4GetXYZ( result, &mat->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), c, s, spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c, spu_splats(0.0f) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ), spu_splats(0.0f) );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeFromElems( &result->col2, s, spu_splats(0.0f), c, spu_splats(0.0f) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV4MakeFromElems( &result->col0, c, s, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    vmathSoaV4MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ), spu_splats(0.0f) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec->x;
+    y = unitVec->y;
+    z = unitVec->z;
+    xy = spu_mul( x, y );
+    yz = spu_mul( y, z );
+    zx = spu_mul( z, x );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    vmathSoaV4MakeFromElems( &result->col0, spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ), spu_splats(0.0f) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat )
+{
+    VmathSoaTransform3 tmpT3_0;
+    vmathSoaT3MakeRotationQ( &tmpT3_0, unitQuat );
+    vmathSoaM4MakeFromT3( result, &tmpT3_0 );
+}
+
+static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV4MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z, spu_splats(0.0f) );
+    vmathSoaV4MakeWAxis( &result->col3 );
+}
+
+static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV4ScalarMul( &result->col0, &mat->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV4ScalarMul( &result->col1, &mat->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV4ScalarMul( &result->col2, &mat->col2, vmathSoaV3GetZ( scaleVec ) );
+    vmathSoaV4Copy( &result->col3, &mat->col3 );
+}
+
+static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat )
+{
+    VmathSoaVector4 scale4;
+    vmathSoaV4MakeFromV3Scalar( &scale4, scaleVec, spu_splats(1.0f) );
+    vmathSoaV4MulPerElem( &result->col0, &mat->col0, &scale4 );
+    vmathSoaV4MulPerElem( &result->col1, &mat->col1, &scale4 );
+    vmathSoaV4MulPerElem( &result->col2, &mat->col2, &scale4 );
+    vmathSoaV4MulPerElem( &result->col3, &mat->col3, &scale4 );
+}
+
+static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV4MakeXAxis( &result->col0 );
+    vmathSoaV4MakeYAxis( &result->col1 );
+    vmathSoaV4MakeZAxis( &result->col2 );
+    vmathSoaV4MakeFromV3Scalar( &result->col3, translateVec, spu_splats(1.0f) );
+}
+
+static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec )
+{
+    VmathSoaMatrix4 m4EyeFrame;
+    VmathSoaVector3 v3X, v3Y, v3Z, tmpV3_0, tmpV3_1;
+    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    vmathSoaV3Normalize( &v3Y, upVec );
+    vmathSoaP3Sub( &tmpV3_0, eyePos, lookAtPos );
+    vmathSoaV3Normalize( &v3Z, &tmpV3_0 );
+    vmathSoaV3Cross( &tmpV3_1, &v3Y, &v3Z );
+    vmathSoaV3Normalize( &v3X, &tmpV3_1 );
+    vmathSoaV3Cross( &v3Y, &v3Z, &v3X );
+    vmathSoaV4MakeFromV3( &tmpV4_0, &v3X );
+    vmathSoaV4MakeFromV3( &tmpV4_1, &v3Y );
+    vmathSoaV4MakeFromV3( &tmpV4_2, &v3Z );
+    vmathSoaV4MakeFromP3( &tmpV4_3, eyePos );
+    vmathSoaM4MakeFromCols( &m4EyeFrame, &tmpV4_0, &tmpV4_1, &tmpV4_2, &tmpV4_3 );
+    vmathSoaM4OrthoInverse( result, &m4EyeFrame );
+}
+
+static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 f, rangeInv;
+    f = tanf4( spu_sub( spu_splats( _VECTORMATH_PI_OVER_2 ), spu_mul( spu_splats(0.5f), fovyRadians ) ) );
+    rangeInv = recipf4( spu_sub( zNear, zFar ) );
+    vmathSoaV4MakeFromElems( &result->col0, divf4( f, aspect ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), f, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_add( zNear, zFar ), rangeInv ), spu_splats(-1.0f) );
+    vmathSoaV4MakeFromElems( &result->col3, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( spu_mul( zNear, zFar ), rangeInv ), spu_splats(2.0f) ), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = spu_add( right, left );
+    sum_tb = spu_add( top, bottom );
+    sum_nf = spu_add( zNear, zFar );
+    inv_rl = recipf4( spu_sub( right, left ) );
+    inv_tb = recipf4( spu_sub( top, bottom ) );
+    inv_nf = recipf4( spu_sub( zNear, zFar ) );
+    n2 = spu_add( zNear, zNear );
+    vmathSoaV4MakeFromElems( &result->col0, spu_mul( n2, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), spu_mul( n2, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_mul( sum_rl, inv_rl ), spu_mul( sum_tb, inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(-1.0f) );
+    vmathSoaV4MakeFromElems( &result->col3, spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( n2, inv_nf ), zFar ), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = spu_add( right, left );
+    sum_tb = spu_add( top, bottom );
+    sum_nf = spu_add( zNear, zFar );
+    inv_rl = recipf4( spu_sub( right, left ) );
+    inv_tb = recipf4( spu_sub( top, bottom ) );
+    inv_nf = recipf4( spu_sub( zNear, zFar ) );
+    vmathSoaV4MakeFromElems( &result->col0, spu_add( inv_rl, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col1, spu_splats(0.0f), spu_add( inv_tb, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), spu_add( inv_nf, inv_nf ), spu_splats(0.0f) );
+    vmathSoaV4MakeFromElems( &result->col3, spu_mul( negatef4( sum_rl ), inv_rl ), spu_mul( negatef4( sum_tb ), inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(1.0f) );
+}
+
+static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 )
+{
+    vmathSoaV4Select( &result->col0, &mat0->col0, &mat1->col0, select1 );
+    vmathSoaV4Select( &result->col1, &mat0->col1, &mat1->col1, select1 );
+    vmathSoaV4Select( &result->col2, &mat0->col2, &mat1->col2, select1 );
+    vmathSoaV4Select( &result->col3, &mat0->col3, &mat1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat )
+{
+    VmathMatrix4 mat0, mat1, mat2, mat3;
+    vmathSoaM4Get4Aos( mat, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathM4Print( &mat0 );
+    printf("slot 1:\n");
+    vmathM4Print( &mat1 );
+    printf("slot 2:\n");
+    vmathM4Print( &mat2 );
+    printf("slot 3:\n");
+    vmathM4Print( &mat3 );
+}
+
+static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaM4Print( mat );
+}
+
+#endif
+
+static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
+    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
+    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
+    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar )
+{
+    vmathSoaV3MakeFromScalar( &result->col0, scalar );
+    vmathSoaV3MakeFromScalar( &result->col1, scalar );
+    vmathSoaV3MakeFromScalar( &result->col2, scalar );
+    vmathSoaV3MakeFromScalar( &result->col3, scalar );
+}
+
+static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0, const VmathSoaVector3 *_col1, const VmathSoaVector3 *_col2, const VmathSoaVector3 *_col3 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+    vmathSoaV3Copy( &result->col1, _col1 );
+    vmathSoaV3Copy( &result->col2, _col2 );
+    vmathSoaV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaT3SetUpper3x3( result, tfrm );
+    vmathSoaT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathSoaT3SetUpper3x3( result, &tmpM3_0 );
+    vmathSoaT3SetTranslation( result, translateVec );
+}
+
+static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm )
+{
+    vmathSoaV3MakeFromAos( &result->col0, &tfrm->col0 );
+    vmathSoaV3MakeFromAos( &result->col1, &tfrm->col1 );
+    vmathSoaV3MakeFromAos( &result->col2, &tfrm->col2 );
+    vmathSoaV3MakeFromAos( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 )
+{
+    vmathSoaV3MakeFrom4Aos( &result->col0, &tfrm0->col0, &tfrm1->col0, &tfrm2->col0, &tfrm3->col0 );
+    vmathSoaV3MakeFrom4Aos( &result->col1, &tfrm0->col1, &tfrm1->col1, &tfrm2->col1, &tfrm3->col1 );
+    vmathSoaV3MakeFrom4Aos( &result->col2, &tfrm0->col2, &tfrm1->col2, &tfrm2->col2, &tfrm3->col2 );
+    vmathSoaV3MakeFrom4Aos( &result->col3, &tfrm0->col3, &tfrm1->col3, &tfrm2->col3, &tfrm3->col3 );
+}
+
+static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
+{
+    vmathSoaV3Get4Aos( &tfrm->col0, &result0->col0, &result1->col0, &result2->col0, &result3->col0 );
+    vmathSoaV3Get4Aos( &tfrm->col1, &result0->col1, &result1->col1, &result2->col1, &result3->col1 );
+    vmathSoaV3Get4Aos( &tfrm->col2, &result0->col2, &result1->col2, &result2->col2, &result3->col2 );
+    vmathSoaV3Get4Aos( &tfrm->col3, &result0->col3, &result1->col3, &result2->col3, &result3->col3 );
+}
+
+static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *_col0 )
+{
+    vmathSoaV3Copy( &result->col0, _col0 );
+}
+
+static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *_col1 )
+{
+    vmathSoaV3Copy( &result->col1, _col1 );
+}
+
+static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *_col2 )
+{
+    vmathSoaV3Copy( &result->col2, _col2 );
+}
+
+static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *_col3 )
+{
+    vmathSoaV3Copy( &result->col3, _col3 );
+}
+
+static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3Copy( (&result->col0 + col), vec );
+}
+
+static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec )
+{
+    vmathSoaV3SetElem( &result->col0, row, vmathSoaV4GetElem( vec, 0 ) );
+    vmathSoaV3SetElem( &result->col1, row, vmathSoaV4GetElem( vec, 1 ) );
+    vmathSoaV3SetElem( &result->col2, row, vmathSoaV4GetElem( vec, 2 ) );
+    vmathSoaV3SetElem( &result->col3, row, vmathSoaV4GetElem( vec, 3 ) );
+}
+
+static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaT3GetCol( &tmpV3_0, result, col );
+    vmathSoaV3SetElem( &tmpV3_0, row, val );
+    vmathSoaT3SetCol( result, col, &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaT3GetCol( &tmpV3_0, tfrm, col );
+    return vmathSoaV3GetElem( &tmpV3_0, row );
+}
+
+static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col0 );
+}
+
+static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col1 );
+}
+
+static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col )
+{
+    vmathSoaV3Copy( result, (&tfrm->col0 + col) );
+}
+
+static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row )
+{
+    vmathSoaV4MakeFromElems( result, vmathSoaV3GetElem( &tfrm->col0, row ), vmathSoaV3GetElem( &tfrm->col1, row ), vmathSoaV3GetElem( &tfrm->col2, row ), vmathSoaV3GetElem( &tfrm->col3, row ) );
+}
+
+static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    VmathSoaVector3 tmp0, tmp1, tmp2, inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    vec_float4 detinv;
+    vmathSoaV3Cross( &tmp0, &tfrm->col1, &tfrm->col2 );
+    vmathSoaV3Cross( &tmp1, &tfrm->col2, &tfrm->col0 );
+    vmathSoaV3Cross( &tmp2, &tfrm->col0, &tfrm->col1 );
+    detinv = recipf4( vmathSoaV3Dot( &tfrm->col2, &tmp2 ) );
+    vmathSoaV3MakeFromElems( &inv0, spu_mul( tmp0.x, detinv ), spu_mul( tmp1.x, detinv ), spu_mul( tmp2.x, detinv ) );
+    vmathSoaV3MakeFromElems( &inv1, spu_mul( tmp0.y, detinv ), spu_mul( tmp1.y, detinv ), spu_mul( tmp2.y, detinv ) );
+    vmathSoaV3MakeFromElems( &inv2, spu_mul( tmp0.z, detinv ), spu_mul( tmp1.z, detinv ), spu_mul( tmp2.z, detinv ) );
+    vmathSoaV3Copy( &result->col0, &inv0 );
+    vmathSoaV3Copy( &result->col1, &inv1 );
+    vmathSoaV3Copy( &result->col2, &inv2 );
+    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    VmathSoaVector3 inv0, inv1, inv2, tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5;
+    vmathSoaV3MakeFromElems( &inv0, tfrm->col0.x, tfrm->col1.x, tfrm->col2.x );
+    vmathSoaV3MakeFromElems( &inv1, tfrm->col0.y, tfrm->col1.y, tfrm->col2.y );
+    vmathSoaV3MakeFromElems( &inv2, tfrm->col0.z, tfrm->col1.z, tfrm->col2.z );
+    vmathSoaV3Copy( &result->col0, &inv0 );
+    vmathSoaV3Copy( &result->col1, &inv1 );
+    vmathSoaV3Copy( &result->col2, &inv2 );
+    vmathSoaV3ScalarMul( &tmpV3_0, &inv0, tfrm->col3.x );
+    vmathSoaV3ScalarMul( &tmpV3_1, &inv1, tfrm->col3.y );
+    vmathSoaV3ScalarMul( &tmpV3_2, &inv2, tfrm->col3.z );
+    vmathSoaV3Add( &tmpV3_3, &tmpV3_1, &tmpV3_2 );
+    vmathSoaV3Add( &tmpV3_4, &tmpV3_0, &tmpV3_3 );
+    vmathSoaV3Neg( &tmpV3_5, &tmpV3_4 );
+    vmathSoaV3Copy( &result->col3, &tmpV3_5 );
+}
+
+static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3AbsPerElem( &result->col0, &tfrm->col0 );
+    vmathSoaV3AbsPerElem( &result->col1, &tfrm->col1 );
+    vmathSoaV3AbsPerElem( &result->col2, &tfrm->col2 );
+    vmathSoaV3AbsPerElem( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = spu_add( spu_add( spu_mul( tfrm->col0.x, vec->x ), spu_mul( tfrm->col1.x, vec->y ) ), spu_mul( tfrm->col2.x, vec->z ) );
+    tmpY = spu_add( spu_add( spu_mul( tfrm->col0.y, vec->x ), spu_mul( tfrm->col1.y, vec->y ) ), spu_mul( tfrm->col2.y, vec->z ) );
+    tmpZ = spu_add( spu_add( spu_mul( tfrm->col0.z, vec->x ), spu_mul( tfrm->col1.z, vec->y ) ), spu_mul( tfrm->col2.z, vec->z ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.x, pnt->x ), spu_mul( tfrm->col1.x, pnt->y ) ), spu_mul( tfrm->col2.x, pnt->z ) ), tfrm->col3.x );
+    tmpY = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.y, pnt->x ), spu_mul( tfrm->col1.y, pnt->y ) ), spu_mul( tfrm->col2.y, pnt->z ) ), tfrm->col3.y );
+    tmpZ = spu_add( spu_add( spu_add( spu_mul( tfrm->col0.z, pnt->x ), spu_mul( tfrm->col1.z, pnt->y ) ), spu_mul( tfrm->col2.z, pnt->z ) ), tfrm->col3.z );
+    vmathSoaP3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
+{
+    VmathSoaTransform3 tmpResult;
+    VmathSoaPoint3 tmpP3_0, tmpP3_1;
+    vmathSoaT3MulV3( &tmpResult.col0, tfrm0, &tfrm1->col0 );
+    vmathSoaT3MulV3( &tmpResult.col1, tfrm0, &tfrm1->col1 );
+    vmathSoaT3MulV3( &tmpResult.col2, tfrm0, &tfrm1->col2 );
+    vmathSoaP3MakeFromV3( &tmpP3_0, &tfrm1->col3 );
+    vmathSoaT3MulP3( &tmpP3_1, tfrm0, &tmpP3_0 );
+    vmathSoaV3MakeFromP3( &tmpResult.col3, &tmpP3_1 );
+    vmathSoaT3Copy( result, &tmpResult );
+}
+
+static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 )
+{
+    vmathSoaV3MulPerElem( &result->col0, &tfrm0->col0, &tfrm1->col0 );
+    vmathSoaV3MulPerElem( &result->col1, &tfrm0->col1, &tfrm1->col1 );
+    vmathSoaV3MulPerElem( &result->col2, &tfrm0->col2, &tfrm1->col2 );
+    vmathSoaV3MulPerElem( &result->col3, &tfrm0->col3, &tfrm1->col3 );
+}
+
+static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm )
+{
+    vmathSoaV3Copy( &result->col0, &tfrm->col0 );
+    vmathSoaV3Copy( &result->col1, &tfrm->col1 );
+    vmathSoaV3Copy( &result->col2, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaM3MakeFromCols( result, &tfrm->col0, &tfrm->col1, &tfrm->col2 );
+}
+
+static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3Copy( result, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), c, s );
+    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), negatef4( s ), c );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, spu_splats(0.0f), negatef4( s ) );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeFromElems( &result->col2, s, spu_splats(0.0f), c );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    vmathSoaV3MakeFromElems( &result->col0, c, s, spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( s ), c, spu_splats(0.0f) );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ->x, &sX, &cX );
+    sincosf4( radiansXYZ->y, &sY, &cY );
+    sincosf4( radiansXYZ->z, &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    vmathSoaV3MakeFromElems( &result->col0, spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3MakeRotationAxis( &tmpM3_0, radians, unitVec );
+    vmathSoaV3MakeFromScalar( &tmpV3_0, spu_splats(0.0f) );
+    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat )
+{
+    VmathSoaMatrix3 tmpM3_0;
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaM3MakeFromQ( &tmpM3_0, unitQuat );
+    vmathSoaV3MakeFromScalar( &tmpV3_0, spu_splats(0.0f) );
+    vmathSoaT3MakeFromM3V3( result, &tmpM3_0, &tmpV3_0 );
+}
+
+static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, scaleVec->x, spu_splats(0.0f), spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col1, spu_splats(0.0f), scaleVec->y, spu_splats(0.0f) );
+    vmathSoaV3MakeFromElems( &result->col2, spu_splats(0.0f), spu_splats(0.0f), scaleVec->z );
+    vmathSoaV3MakeFromScalar( &result->col3, spu_splats(0.0f) );
+}
+
+static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec )
+{
+    vmathSoaV3ScalarMul( &result->col0, &tfrm->col0, vmathSoaV3GetX( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col1, &tfrm->col1, vmathSoaV3GetY( scaleVec ) );
+    vmathSoaV3ScalarMul( &result->col2, &tfrm->col2, vmathSoaV3GetZ( scaleVec ) );
+    vmathSoaV3Copy( &result->col3, &tfrm->col3 );
+}
+
+static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm )
+{
+    vmathSoaV3MulPerElem( &result->col0, &tfrm->col0, scaleVec );
+    vmathSoaV3MulPerElem( &result->col1, &tfrm->col1, scaleVec );
+    vmathSoaV3MulPerElem( &result->col2, &tfrm->col2, scaleVec );
+    vmathSoaV3MulPerElem( &result->col3, &tfrm->col3, scaleVec );
+}
+
+static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec )
+{
+    vmathSoaV3MakeXAxis( &result->col0 );
+    vmathSoaV3MakeYAxis( &result->col1 );
+    vmathSoaV3MakeZAxis( &result->col2 );
+    vmathSoaV3Copy( &result->col3, translateVec );
+}
+
+static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 )
+{
+    vmathSoaV3Select( &result->col0, &tfrm0->col0, &tfrm1->col0, select1 );
+    vmathSoaV3Select( &result->col1, &tfrm0->col1, &tfrm1->col1, select1 );
+    vmathSoaV3Select( &result->col2, &tfrm0->col2, &tfrm1->col2, select1 );
+    vmathSoaV3Select( &result->col3, &tfrm0->col3, &tfrm1->col3, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm )
+{
+    VmathTransform3 mat0, mat1, mat2, mat3;
+    vmathSoaT3Get4Aos( tfrm, &mat0, &mat1, &mat2, &mat3 );
+    printf("slot 0:\n");
+    vmathT3Print( &mat0 );
+    printf("slot 1:\n");
+    vmathT3Print( &mat1 );
+    printf("slot 2:\n");
+    vmathT3Print( &mat2 );
+    printf("slot 3:\n");
+    vmathT3Print( &mat3 );
+}
+
+static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name )
+{
+    printf("%s:\n", name);
+    vmathSoaT3Print( tfrm );
+}
+
+#endif
+
+static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *tfrm )
+{
+    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
+    vec_uint4 largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm->col0.x;
+    yx = tfrm->col0.y;
+    zx = tfrm->col0.z;
+    xy = tfrm->col1.x;
+    yy = tfrm->col1.y;
+    zy = tfrm->col1.z;
+    xz = tfrm->col2.x;
+    yz = tfrm->col2.y;
+    zz = tfrm->col2.z;
+
+    trace = spu_add( spu_add( xx, yy ), zz );
+
+    negTrace = spu_cmpgt( spu_splats(0.0f), trace );
+    ZgtX = spu_cmpgt( zz, xx );
+    ZgtY = spu_cmpgt( zz, yy );
+    YgtX = spu_cmpgt( yy, xx );
+    largestXorY = spu_and( negTrace, spu_nand( ZgtX, ZgtY ) );
+    largestYorZ = spu_and( negTrace, spu_or( YgtX, ZgtX ) );
+    largestZorX = spu_and( negTrace, spu_orc( ZgtY, YgtX ) );
+    
+    zz = spu_sel( zz, negatef4(zz), largestXorY );
+    xy = spu_sel( xy, negatef4(xy), largestXorY );
+    xx = spu_sel( xx, negatef4(xx), largestYorZ );
+    yz = spu_sel( yz, negatef4(yz), largestYorZ );
+    yy = spu_sel( yy, negatef4(yy), largestZorX );
+    zx = spu_sel( zx, negatef4(zx), largestZorX );
+
+    radicand = spu_add( spu_add( spu_add( xx, yy ), zz ), spu_splats(1.0f) );
+    scale = spu_mul( spu_splats(0.5f), rsqrtf4( radicand ) );
+
+    tmpx = spu_mul( spu_sub( zy, yz ), scale );
+    tmpy = spu_mul( spu_sub( xz, zx ), scale );
+    tmpz = spu_mul( spu_sub( yx, xy ), scale );
+    tmpw = spu_mul( radicand, scale );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    qx = spu_sel( qx, tmpw, largestXorY );
+    qy = spu_sel( qy, tmpz, largestXorY );
+    qz = spu_sel( qz, tmpy, largestXorY );
+    qw = spu_sel( qw, tmpx, largestXorY );
+    tmpx = qx;
+    tmpz = qz;
+    qx = spu_sel( qx, qy, largestYorZ );
+    qy = spu_sel( qy, tmpx, largestYorZ );
+    qz = spu_sel( qz, qw, largestYorZ );
+    qw = spu_sel( qw, tmpz, largestYorZ );
+
+    result->x = qx;
+    result->y = qy;
+    result->z = qz;
+    result->w = qw;
+}
+
+static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *tfrm0, const VmathSoaVector3 *tfrm1 )
+{
+    vmathSoaV3ScalarMul( &result->col0, tfrm0, vmathSoaV3GetX( tfrm1 ) );
+    vmathSoaV3ScalarMul( &result->col1, tfrm0, vmathSoaV3GetY( tfrm1 ) );
+    vmathSoaV3ScalarMul( &result->col2, tfrm0, vmathSoaV3GetZ( tfrm1 ) );
+}
+
+static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *tfrm0, const VmathSoaVector4 *tfrm1 )
+{
+    vmathSoaV4ScalarMul( &result->col0, tfrm0, vmathSoaV4GetX( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col1, tfrm0, vmathSoaV4GetY( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col2, tfrm0, vmathSoaV4GetZ( tfrm1 ) );
+    vmathSoaV4ScalarMul( &result->col3, tfrm0, vmathSoaV4GetW( tfrm1 ) );
+}
+
+static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = spu_add( spu_add( spu_mul( vec->x, mat->col0.x ), spu_mul( vec->y, mat->col0.y ) ), spu_mul( vec->z, mat->col0.z ) );
+    tmpY = spu_add( spu_add( spu_mul( vec->x, mat->col1.x ), spu_mul( vec->y, mat->col1.y ) ), spu_mul( vec->z, mat->col1.z ) );
+    tmpZ = spu_add( spu_add( spu_mul( vec->x, mat->col2.x ), spu_mul( vec->y, mat->col2.y ) ), spu_mul( vec->z, mat->col2.z ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec )
+{
+    vmathSoaV3MakeFromElems( &result->col0, spu_splats(0.0f), vec->z, negatef4( vec->y ) );
+    vmathSoaV3MakeFromElems( &result->col1, negatef4( vec->z ), spu_splats(0.0f), vec->x );
+    vmathSoaV3MakeFromElems( &result->col2, vec->y, negatef4( vec->x ), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    vmathSoaV3Cross( &tmpV3_0, vec, &mat->col0 );
+    vmathSoaV3Cross( &tmpV3_1, vec, &mat->col1 );
+    vmathSoaV3Cross( &tmpV3_2, vec, &mat->col2 );
+    vmathSoaM3MakeFromCols( result, &tmpV3_0, &tmpV3_1, &tmpV3_2 );
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa_v.h
index c8401e3d0..0b16a9553 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/mat_soa_v.h
@@ -1,1063 +1,1063 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_V_C_H
-#define _VECTORMATH_MAT_SOA_V_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- */
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromCols(&result, &_col0, &_col1, &_col2);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFromAos(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
-    return result;
-}
-
-static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
-{
-    vmathSoaM3Get4Aos(&mat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col0 )
-{
-    vmathSoaM3SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col1 )
-{
-    vmathSoaM3SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col2 )
-{
-    vmathSoaM3SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec )
-{
-    vmathSoaM3SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec )
-{
-    vmathSoaM3SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaM3SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row )
-{
-    return vmathSoaM3GetElem(&mat, col, row);
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Inverse(&result, &mat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat )
-{
-    return vmathSoaM3Determinant(&mat);
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaM3MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeIdentity(&result);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM3Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat )
-{
-    vmathSoaM3Print(&mat);
-}
-
-static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name )
-{
-    vmathSoaM3Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromT3(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 _col0, VmathSoaVector4 _col1, VmathSoaVector4 _col2, VmathSoaVector4 _col3 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromM3V3(&result, &mat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFromAos(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
-    return result;
-}
-
-static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
-{
-    vmathSoaM4Get4Aos(&mat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col0 )
-{
-    vmathSoaM4SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col1 )
-{
-    vmathSoaM4SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col2 )
-{
-    vmathSoaM4SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col3 )
-{
-    vmathSoaM4SetCol3(result, &_col3);
-}
-
-static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec )
-{
-    vmathSoaM4SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec )
-{
-    vmathSoaM4SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaM4SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row )
-{
-    return vmathSoaM4GetElem(&mat, col, row);
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol0(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol1(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol2(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol3(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetCol(&result, &mat, col);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4GetRow(&result, &mat, row);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Transpose(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Inverse(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AffineInverse(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4OrthoInverse(&result, &mat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat )
-{
-    return vmathSoaM4Determinant(&mat);
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Add(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Sub(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Neg(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AbsPerElem(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4ScalarMul(&result, &mat, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulV4(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulV3(&result, &mat, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt )
-{
-    VmathSoaVector4 result;
-    vmathSoaM4MulP3(&result, &mat, &pnt);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Mul(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MulT3(&result, &mat, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MulPerElem(&result, &mat0, &mat1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 )
-{
-    vmathSoaM4SetUpper3x3(result, &mat3);
-}
-
-static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaM4GetUpper3x3(&result, &mat);
-    return result;
-}
-
-static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec )
-{
-    vmathSoaM4SetTranslation(result, &translateVec);
-}
-
-static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaM4GetTranslation(&result, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4AppendScale(&result, &mat, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4PrependScale(&result, &scaleVec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaM4Select(&result, &mat0, &mat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat )
-{
-    vmathSoaM4Print(&mat);
-}
-
-static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name )
-{
-    vmathSoaM4Prints(&mat, name);
-}
-
-#endif
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2, VmathSoaVector3 _col3 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromM3V3(&result, &tfrm, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromQV3(&result, &unitQuat, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFromAos(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeFrom4Aos(&result, &tfrm0, &tfrm1, &tfrm2, &tfrm3);
-    return result;
-}
-
-static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
-{
-    vmathSoaT3Get4Aos(&tfrm, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 _col0 )
-{
-    vmathSoaT3SetCol0(result, &_col0);
-}
-
-static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 _col1 )
-{
-    vmathSoaT3SetCol1(result, &_col1);
-}
-
-static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 _col2 )
-{
-    vmathSoaT3SetCol2(result, &_col2);
-}
-
-static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 _col3 )
-{
-    vmathSoaT3SetCol3(result, &_col3);
-}
-
-static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec )
-{
-    vmathSoaT3SetCol(result, col, &vec);
-}
-
-static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec )
-{
-    vmathSoaT3SetRow(result, row, &vec);
-}
-
-static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
-{
-    vmathSoaT3SetElem(result, col, row, val);
-}
-
-static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row )
-{
-    return vmathSoaT3GetElem(&tfrm, col, row);
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol0(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol1(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol2(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetCol(&result, &tfrm, col);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row )
-{
-    VmathSoaVector4 result;
-    vmathSoaT3GetRow(&result, &tfrm, row);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Inverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3OrthoInverse(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3AbsPerElem(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3MulV3(&result, &tfrm, &vec);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaT3MulP3(&result, &tfrm, &pnt);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Mul(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MulPerElem(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeIdentity(&result);
-    return result;
-}
-
-static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 tfrm )
-{
-    vmathSoaT3SetUpper3x3(result, &tfrm);
-}
-
-static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaT3GetUpper3x3(&result, &tfrm);
-    return result;
-}
-
-static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec )
-{
-    vmathSoaT3SetTranslation(result, &translateVec);
-}
-
-static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm )
-{
-    VmathSoaVector3 result;
-    vmathSoaT3GetTranslation(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationZYX(&result, &radiansXYZ);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeRotationQ(&result, &unitQuat);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeScale(&result, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3AppendScale(&result, &tfrm, &scaleVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3PrependScale(&result, &scaleVec, &tfrm);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3MakeTranslation(&result, &translateVec);
-    return result;
-}
-
-static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 )
-{
-    VmathSoaTransform3 result;
-    vmathSoaT3Select(&result, &tfrm0, &tfrm1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm )
-{
-    vmathSoaT3Print(&tfrm);
-}
-
-static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name )
-{
-    vmathSoaT3Prints(&tfrm, name);
-}
-
-#endif
-
-static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 tfrm )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromM3(&result, &tfrm);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 tfrm0, VmathSoaVector3 tfrm1 )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 tfrm0, VmathSoaVector4 tfrm1 )
-{
-    VmathSoaMatrix4 result;
-    vmathSoaV4Outer(&result, &tfrm0, &tfrm1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RowMul(&result, &vec, &mat);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3CrossMatrix(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
-{
-    VmathSoaMatrix3 result;
-    vmathSoaV3CrossMatrixMul(&result, &vec, &mat);
-    return result;
-}
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_V_C_H
+#define _VECTORMATH_MAT_SOA_V_C_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ */
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromCols(&result, &_col0, &_col1, &_col2);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFromAos(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
+    return result;
+}
+
+static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 )
+{
+    vmathSoaM3Get4Aos(&mat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col0 )
+{
+    vmathSoaM3SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col1 )
+{
+    vmathSoaM3SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 _col2 )
+{
+    vmathSoaM3SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec )
+{
+    vmathSoaM3SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec )
+{
+    vmathSoaM3SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaM3SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row )
+{
+    return vmathSoaM3GetElem(&mat, col, row);
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Inverse(&result, &mat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat )
+{
+    return vmathSoaM3Determinant(&mat);
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaM3MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeIdentity(&result);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM3Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat )
+{
+    vmathSoaM3Print(&mat);
+}
+
+static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name )
+{
+    vmathSoaM3Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromT3(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 _col0, VmathSoaVector4 _col1, VmathSoaVector4 _col2, VmathSoaVector4 _col3 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromM3V3(&result, &mat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFromAos(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFrom4Aos(&result, &mat0, &mat1, &mat2, &mat3);
+    return result;
+}
+
+static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 )
+{
+    vmathSoaM4Get4Aos(&mat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col0 )
+{
+    vmathSoaM4SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col1 )
+{
+    vmathSoaM4SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col2 )
+{
+    vmathSoaM4SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 _col3 )
+{
+    vmathSoaM4SetCol3(result, &_col3);
+}
+
+static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec )
+{
+    vmathSoaM4SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec )
+{
+    vmathSoaM4SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaM4SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row )
+{
+    return vmathSoaM4GetElem(&mat, col, row);
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol0(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol1(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol2(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol3(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetCol(&result, &mat, col);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4GetRow(&result, &mat, row);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Transpose(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Inverse(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AffineInverse(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4OrthoInverse(&result, &mat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat )
+{
+    return vmathSoaM4Determinant(&mat);
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Add(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Sub(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Neg(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AbsPerElem(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4ScalarMul(&result, &mat, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulV4(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulV3(&result, &mat, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt )
+{
+    VmathSoaVector4 result;
+    vmathSoaM4MulP3(&result, &mat, &pnt);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Mul(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MulT3(&result, &mat, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MulPerElem(&result, &mat0, &mat1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 )
+{
+    vmathSoaM4SetUpper3x3(result, &mat3);
+}
+
+static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaM4GetUpper3x3(&result, &mat);
+    return result;
+}
+
+static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec )
+{
+    vmathSoaM4SetTranslation(result, &translateVec);
+}
+
+static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaM4GetTranslation(&result, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4AppendScale(&result, &mat, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4PrependScale(&result, &scaleVec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeLookAt(&result, &eyePos, &lookAtPos, &upVec);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakePerspective(&result, fovyRadians, aspect, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeFrustum(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4MakeOrthographic(&result, left, right, bottom, top, zNear, zFar);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaM4Select(&result, &mat0, &mat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat )
+{
+    vmathSoaM4Print(&mat);
+}
+
+static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name )
+{
+    vmathSoaM4Prints(&mat, name);
+}
+
+#endif
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 _col0, VmathSoaVector3 _col1, VmathSoaVector3 _col2, VmathSoaVector3 _col3 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromCols(&result, &_col0, &_col1, &_col2, &_col3);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromM3V3(&result, &tfrm, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromQV3(&result, &unitQuat, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFromAos(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeFrom4Aos(&result, &tfrm0, &tfrm1, &tfrm2, &tfrm3);
+    return result;
+}
+
+static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 )
+{
+    vmathSoaT3Get4Aos(&tfrm, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 _col0 )
+{
+    vmathSoaT3SetCol0(result, &_col0);
+}
+
+static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 _col1 )
+{
+    vmathSoaT3SetCol1(result, &_col1);
+}
+
+static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 _col2 )
+{
+    vmathSoaT3SetCol2(result, &_col2);
+}
+
+static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 _col3 )
+{
+    vmathSoaT3SetCol3(result, &_col3);
+}
+
+static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec )
+{
+    vmathSoaT3SetCol(result, col, &vec);
+}
+
+static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec )
+{
+    vmathSoaT3SetRow(result, row, &vec);
+}
+
+static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val )
+{
+    vmathSoaT3SetElem(result, col, row, val);
+}
+
+static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row )
+{
+    return vmathSoaT3GetElem(&tfrm, col, row);
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol0(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol1(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol2(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetCol(&result, &tfrm, col);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row )
+{
+    VmathSoaVector4 result;
+    vmathSoaT3GetRow(&result, &tfrm, row);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Inverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3OrthoInverse(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3AbsPerElem(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3MulV3(&result, &tfrm, &vec);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaT3MulP3(&result, &tfrm, &pnt);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Mul(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MulPerElem(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeIdentity(&result);
+    return result;
+}
+
+static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 tfrm )
+{
+    vmathSoaT3SetUpper3x3(result, &tfrm);
+}
+
+static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaT3GetUpper3x3(&result, &tfrm);
+    return result;
+}
+
+static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec )
+{
+    vmathSoaT3SetTranslation(result, &translateVec);
+}
+
+static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm )
+{
+    VmathSoaVector3 result;
+    vmathSoaT3GetTranslation(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationZYX(&result, &radiansXYZ);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeRotationQ(&result, &unitQuat);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeScale(&result, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3AppendScale(&result, &tfrm, &scaleVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3PrependScale(&result, &scaleVec, &tfrm);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3MakeTranslation(&result, &translateVec);
+    return result;
+}
+
+static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 )
+{
+    VmathSoaTransform3 result;
+    vmathSoaT3Select(&result, &tfrm0, &tfrm1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm )
+{
+    vmathSoaT3Print(&tfrm);
+}
+
+static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name )
+{
+    vmathSoaT3Prints(&tfrm, name);
+}
+
+#endif
+
+static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 tfrm )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromM3(&result, &tfrm);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 tfrm0, VmathSoaVector3 tfrm1 )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 tfrm0, VmathSoaVector4 tfrm1 )
+{
+    VmathSoaMatrix4 result;
+    vmathSoaV4Outer(&result, &tfrm0, &tfrm1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RowMul(&result, &vec, &mat);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3CrossMatrix(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat )
+{
+    VmathSoaMatrix3 result;
+    vmathSoaV3CrossMatrixMul(&result, &vec, &mat);
+    return result;
+}
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos.h
index 0f25d654b..57ff6ecb1 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos.h
@@ -1,371 +1,371 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_C_H
-#define _VECTORMATH_QUAT_AOS_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
-{
-    result->vec128 = (vec_float4){ _x, _y, _z, _w };
-}
-
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
-{
-    result->vec128 = spu_shuffle( xyz->vec128, spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
-}
-
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
-{
-    result->vec128 = spu_splats( scalar );
-}
-
-static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathQMakeIdentity( VmathQuat *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0001;
-}
-
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    VmathQuat tmpQ_0, tmpQ_1;
-    vmathQSub( &tmpQ_0, quat1, quat0 );
-    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
-    vmathQAdd( result, quat0, &tmpQ_1 );
-}
-
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
-{
-    VmathQuat start;
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot4( unitQuat0->vec128, unitQuat1->vec128 );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
-    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start.vec128 = spu_sel( unitQuat0->vec128, negatef4( unitQuat0->vec128 ), selectMask );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    result->vec128 = spu_madd( start.vec128, scale0, spu_mul( unitQuat1->vec128, scale1 ) );
-}
-
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
-{
-    VmathQuat tmp0, tmp1;
-    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
-    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
-    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
-}
-
-static inline vec_float4 vmathQGet128( const VmathQuat *quat )
-{
-    return quat->vec128;
-}
-
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
-{
-    result->vec128 = spu_sel( vec->vec128, result->vec128, (vec_uint4)spu_maskb(0x000f) );
-}
-
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathQSetX( VmathQuat *result, float _x )
-{
-    result->vec128 = spu_insert( _x, result->vec128, 0 );
-}
-
-static inline float vmathQGetX( const VmathQuat *quat )
-{
-    return spu_extract( quat->vec128, 0 );
-}
-
-static inline void vmathQSetY( VmathQuat *result, float _y )
-{
-    result->vec128 = spu_insert( _y, result->vec128, 1 );
-}
-
-static inline float vmathQGetY( const VmathQuat *quat )
-{
-    return spu_extract( quat->vec128, 1 );
-}
-
-static inline void vmathQSetZ( VmathQuat *result, float _z )
-{
-    result->vec128 = spu_insert( _z, result->vec128, 2 );
-}
-
-static inline float vmathQGetZ( const VmathQuat *quat )
-{
-    return spu_extract( quat->vec128, 2 );
-}
-
-static inline void vmathQSetW( VmathQuat *result, float _w )
-{
-    result->vec128 = spu_insert( _w, result->vec128, 3 );
-}
-
-static inline float vmathQGetW( const VmathQuat *quat )
-{
-    return spu_extract( quat->vec128, 3 );
-}
-
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
-{
-    result->vec128 = spu_insert( value, result->vec128, idx );
-}
-
-static inline float vmathQGetElem( const VmathQuat *quat, int idx )
-{
-    return spu_extract( quat->vec128, idx );
-}
-
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->vec128 = spu_add( quat0->vec128, quat1->vec128 );
-}
-
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    result->vec128 = spu_sub( quat0->vec128, quat1->vec128 );
-}
-
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->vec128 = spu_mul( quat->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
-{
-    result->vec128 = divf4( quat->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = negatef4( quat->vec128 );
-}
-
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    return spu_extract( _vmathVfDot4( quat0->vec128, quat1->vec128 ), 0 );
-}
-
-static inline float vmathQNorm( const VmathQuat *quat )
-{
-    return spu_extract( _vmathVfDot4( quat->vec128, quat->vec128 ), 0 );
-}
-
-static inline float vmathQLength( const VmathQuat *quat )
-{
-    return sqrtf( vmathQNorm( quat ) );
-}
-
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
-{
-    vec_float4 dot = _vmathVfDot4( quat->vec128, quat->vec128 );
-    result->vec128 = spu_mul( quat->vec128, rsqrtf4( dot ) );
-}
-
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    VmathVector3 crossVec, tmpV3_0;
-    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
-    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, (vec_uchar16)spu_splats(0x00010203) );
-    cosAngleX2Plus2 = spu_madd( cosAngle, spu_splats(2.0f), spu_splats(2.0f) );
-    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
-    cosHalfAngleX2 = spu_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
-    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
-    crossVec = tmpV3_0;
-    res = spu_mul( crossVec.vec128, recipCosHalfAngleX2 );
-    res = spu_sel( res, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ), (vec_uint4)spu_maskb(0x000f) );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_mul( unitVec->vec128, s ), c, (vec_uint4)spu_maskb(0x000f) );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0xf000) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x0f00) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    result->vec128 = res;
-}
-
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x00f0) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    result->vec128 = res;
-}
-
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
-{
-    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
-    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
-    ldata = quat0->vec128;
-    rdata = quat1->vec128;
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    tmp0 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_YZXW );
-    qv = spu_mul( spu_shuffle( ldata, ldata, shuffle_wwww ), rdata );
-    qv = spu_madd( spu_shuffle( rdata, rdata, shuffle_wwww ), ldata, qv );
-    qv = spu_madd( tmp0, tmp1, qv );
-    qv = spu_nmsub( tmp2, tmp3, qv );
-    product = spu_mul( ldata, rdata );
-    l_wxyz = spu_rlqwbyte( ldata, 12 );
-    r_wxyz = spu_rlqwbyte( rdata, 12 );
-    qw = spu_nmsub( l_wxyz, r_wxyz, product );
-    xy = spu_madd( l_wxyz, r_wxyz, product );
-    qw = spu_sub( qw, spu_rlqwbyte( xy, 8 ) );
-    result->vec128 = spu_sel( qv, qw, (vec_uint4)spu_maskb( 0x000f ) );
-}
-
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
-{
-    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
-    qdata = quat->vec128;
-    vdata = vec->vec128;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    tmp0 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_YZXW );
-    wwww = spu_shuffle( qdata, qdata, shuffle_wwww );
-    qv = spu_mul( wwww, vdata );
-    qv = spu_madd( tmp0, tmp1, qv );
-    qv = spu_nmsub( tmp2, tmp3, qv );
-    product = spu_mul( qdata, vdata );
-    qw = spu_madd( spu_rlqwbyte( qdata, 4 ), spu_rlqwbyte( vdata, 4 ), product );
-    qw = spu_add( spu_rlqwbyte( product, 8 ), qw );
-    tmp1 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_YZXW );
-    res = spu_mul( spu_shuffle( qw, qw, shuffle_xxxx ), qdata );
-    res = spu_madd( wwww, qv, res );
-    res = spu_madd( tmp0, tmp1, res );
-    res = spu_nmsub( tmp2, tmp3, res );
-    result->vec128 = res;
-}
-
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
-{
-    result->vec128 = spu_xor( quat->vec128, ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) );
-}
-
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
-{
-    result->vec128 = spu_sel( quat0->vec128, quat1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint( const VmathQuat *quat )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat->vec128;
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-static inline void vmathQPrints( const VmathQuat *quat, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat->vec128;
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_C_H
+#define _VECTORMATH_QUAT_AOS_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathQMakeFromElems( VmathQuat *result, float _x, float _y, float _z, float _w )
+{
+    result->vec128 = (vec_float4){ _x, _y, _z, _w };
+}
+
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float _w )
+{
+    result->vec128 = spu_shuffle( xyz->vec128, spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
+}
+
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar )
+{
+    result->vec128 = spu_splats( scalar );
+}
+
+static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathQMakeIdentity( VmathQuat *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0001;
+}
+
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    VmathQuat tmpQ_0, tmpQ_1;
+    vmathQSub( &tmpQ_0, quat1, quat0 );
+    vmathQScalarMul( &tmpQ_1, &tmpQ_0, t );
+    vmathQAdd( result, quat0, &tmpQ_1 );
+}
+
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 )
+{
+    VmathQuat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot4( unitQuat0->vec128, unitQuat1->vec128 );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
+    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start.vec128 = spu_sel( unitQuat0->vec128, negatef4( unitQuat0->vec128 ), selectMask );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    result->vec128 = spu_madd( start.vec128, scale0, spu_mul( unitQuat1->vec128, scale1 ) );
+}
+
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 )
+{
+    VmathQuat tmp0, tmp1;
+    vmathQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
+    vmathQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
+    vmathQSlerp( result, ( ( 2.0f * t ) * ( 1.0f - t ) ), &tmp0, &tmp1 );
+}
+
+static inline vec_float4 vmathQGet128( const VmathQuat *quat )
+{
+    return quat->vec128;
+}
+
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec )
+{
+    result->vec128 = spu_sel( vec->vec128, result->vec128, (vec_uint4)spu_maskb(0x000f) );
+}
+
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathQSetX( VmathQuat *result, float _x )
+{
+    result->vec128 = spu_insert( _x, result->vec128, 0 );
+}
+
+static inline float vmathQGetX( const VmathQuat *quat )
+{
+    return spu_extract( quat->vec128, 0 );
+}
+
+static inline void vmathQSetY( VmathQuat *result, float _y )
+{
+    result->vec128 = spu_insert( _y, result->vec128, 1 );
+}
+
+static inline float vmathQGetY( const VmathQuat *quat )
+{
+    return spu_extract( quat->vec128, 1 );
+}
+
+static inline void vmathQSetZ( VmathQuat *result, float _z )
+{
+    result->vec128 = spu_insert( _z, result->vec128, 2 );
+}
+
+static inline float vmathQGetZ( const VmathQuat *quat )
+{
+    return spu_extract( quat->vec128, 2 );
+}
+
+static inline void vmathQSetW( VmathQuat *result, float _w )
+{
+    result->vec128 = spu_insert( _w, result->vec128, 3 );
+}
+
+static inline float vmathQGetW( const VmathQuat *quat )
+{
+    return spu_extract( quat->vec128, 3 );
+}
+
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value )
+{
+    result->vec128 = spu_insert( value, result->vec128, idx );
+}
+
+static inline float vmathQGetElem( const VmathQuat *quat, int idx )
+{
+    return spu_extract( quat->vec128, idx );
+}
+
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->vec128 = spu_add( quat0->vec128, quat1->vec128 );
+}
+
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    result->vec128 = spu_sub( quat0->vec128, quat1->vec128 );
+}
+
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->vec128 = spu_mul( quat->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar )
+{
+    result->vec128 = divf4( quat->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = negatef4( quat->vec128 );
+}
+
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    return spu_extract( _vmathVfDot4( quat0->vec128, quat1->vec128 ), 0 );
+}
+
+static inline float vmathQNorm( const VmathQuat *quat )
+{
+    return spu_extract( _vmathVfDot4( quat->vec128, quat->vec128 ), 0 );
+}
+
+static inline float vmathQLength( const VmathQuat *quat )
+{
+    return sqrtf( vmathQNorm( quat ) );
+}
+
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat )
+{
+    vec_float4 dot = _vmathVfDot4( quat->vec128, quat->vec128 );
+    result->vec128 = spu_mul( quat->vec128, rsqrtf4( dot ) );
+}
+
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    VmathVector3 crossVec, tmpV3_0;
+    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, (vec_uchar16)spu_splats(0x00010203) );
+    cosAngleX2Plus2 = spu_madd( cosAngle, spu_splats(2.0f), spu_splats(2.0f) );
+    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
+    cosHalfAngleX2 = spu_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
+    vmathV3Cross( &tmpV3_0, unitVec0, unitVec1 );
+    crossVec = tmpV3_0;
+    res = spu_mul( crossVec.vec128, recipCosHalfAngleX2 );
+    res = spu_sel( res, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ), (vec_uint4)spu_maskb(0x000f) );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_mul( unitVec->vec128, s ), c, (vec_uint4)spu_maskb(0x000f) );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0xf000) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x0f00) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    result->vec128 = res;
+}
+
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x00f0) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    result->vec128 = res;
+}
+
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 )
+{
+    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = quat0->vec128;
+    rdata = quat1->vec128;
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    tmp0 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_YZXW );
+    qv = spu_mul( spu_shuffle( ldata, ldata, shuffle_wwww ), rdata );
+    qv = spu_madd( spu_shuffle( rdata, rdata, shuffle_wwww ), ldata, qv );
+    qv = spu_madd( tmp0, tmp1, qv );
+    qv = spu_nmsub( tmp2, tmp3, qv );
+    product = spu_mul( ldata, rdata );
+    l_wxyz = spu_rlqwbyte( ldata, 12 );
+    r_wxyz = spu_rlqwbyte( rdata, 12 );
+    qw = spu_nmsub( l_wxyz, r_wxyz, product );
+    xy = spu_madd( l_wxyz, r_wxyz, product );
+    qw = spu_sub( qw, spu_rlqwbyte( xy, 8 ) );
+    result->vec128 = spu_sel( qv, qw, (vec_uint4)spu_maskb( 0x000f ) );
+}
+
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *quat, const VmathVector3 *vec )
+{
+    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat->vec128;
+    vdata = vec->vec128;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    tmp0 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_YZXW );
+    wwww = spu_shuffle( qdata, qdata, shuffle_wwww );
+    qv = spu_mul( wwww, vdata );
+    qv = spu_madd( tmp0, tmp1, qv );
+    qv = spu_nmsub( tmp2, tmp3, qv );
+    product = spu_mul( qdata, vdata );
+    qw = spu_madd( spu_rlqwbyte( qdata, 4 ), spu_rlqwbyte( vdata, 4 ), product );
+    qw = spu_add( spu_rlqwbyte( product, 8 ), qw );
+    tmp1 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_YZXW );
+    res = spu_mul( spu_shuffle( qw, qw, shuffle_xxxx ), qdata );
+    res = spu_madd( wwww, qv, res );
+    res = spu_madd( tmp0, tmp1, res );
+    res = spu_nmsub( tmp2, tmp3, res );
+    result->vec128 = res;
+}
+
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat )
+{
+    result->vec128 = spu_xor( quat->vec128, ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) );
+}
+
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 )
+{
+    result->vec128 = spu_sel( quat0->vec128, quat1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint( const VmathQuat *quat )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat->vec128;
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+static inline void vmathQPrints( const VmathQuat *quat, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat->vec128;
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos_v.h
index 04cf6ccf9..cc519d805 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_aos_v.h
@@ -1,312 +1,312 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_V_C_H
-#define _VECTORMATH_QUAT_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathQuat result;
-    vmathQMakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
-{
-    VmathQuat result;
-    vmathQMakeFromV4(&result, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
-{
-    VmathQuat result;
-    vmathQMakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 )
-{
-    VmathQuat result;
-    vmathQMakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeIdentity_V( )
-{
-    VmathQuat result;
-    vmathQMakeIdentity(&result);
-    return result;
-}
-
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQLerp(&result, t, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
-{
-    VmathQuat result;
-    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
-{
-    VmathQuat result;
-    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
-    return result;
-}
-
-static inline vec_float4 vmathQGet128_V( VmathQuat quat )
-{
-    return vmathQGet128(&quat);
-}
-
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
-{
-    vmathQSetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
-{
-    VmathVector3 result;
-    vmathQGetXYZ(&result, &quat);
-    return result;
-}
-
-static inline void vmathQSetX_V( VmathQuat *result, float _x )
-{
-    vmathQSetX(result, _x);
-}
-
-static inline float vmathQGetX_V( VmathQuat quat )
-{
-    return vmathQGetX(&quat);
-}
-
-static inline void vmathQSetY_V( VmathQuat *result, float _y )
-{
-    vmathQSetY(result, _y);
-}
-
-static inline float vmathQGetY_V( VmathQuat quat )
-{
-    return vmathQGetY(&quat);
-}
-
-static inline void vmathQSetZ_V( VmathQuat *result, float _z )
-{
-    vmathQSetZ(result, _z);
-}
-
-static inline float vmathQGetZ_V( VmathQuat quat )
-{
-    return vmathQGetZ(&quat);
-}
-
-static inline void vmathQSetW_V( VmathQuat *result, float _w )
-{
-    vmathQSetW(result, _w);
-}
-
-static inline float vmathQGetW_V( VmathQuat quat )
-{
-    return vmathQGetW(&quat);
-}
-
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
-{
-    vmathQSetElem(result, idx, value);
-}
-
-static inline float vmathQGetElem_V( VmathQuat quat, int idx )
-{
-    return vmathQGetElem(&quat, idx);
-}
-
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQAdd(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQSub(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarMul(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
-{
-    VmathQuat result;
-    vmathQScalarDiv(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathQuat vmathQNeg_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNeg(&result, &quat);
-    return result;
-}
-
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    return vmathQDot(&quat0, &quat1);
-}
-
-static inline float vmathQNorm_V( VmathQuat quat )
-{
-    return vmathQNorm(&quat);
-}
-
-static inline float vmathQLength_V( VmathQuat quat )
-{
-    return vmathQLength(&quat);
-}
-
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQNormalize(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathQuat result;
-    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
-{
-    VmathQuat result;
-    vmathQMakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationX_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationY_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMakeRotationZ_V( float radians )
-{
-    VmathQuat result;
-    vmathQMakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
-{
-    VmathQuat result;
-    vmathQMul(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathQRotate(&result, &quat, &vec);
-    return result;
-}
-
-static inline VmathQuat vmathQConj_V( VmathQuat quat )
-{
-    VmathQuat result;
-    vmathQConj(&result, &quat);
-    return result;
-}
-
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
-{
-    VmathQuat result;
-    vmathQSelect(&result, &quat0, &quat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathQPrint_V( VmathQuat quat )
-{
-    vmathQPrint(&quat);
-}
-
-static inline void vmathQPrints_V( VmathQuat quat, const char *name )
-{
-    vmathQPrints(&quat, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_V_C_H
+#define _VECTORMATH_QUAT_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathQuat vmathQMakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathQuat result;
+    vmathQMakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec )
+{
+    VmathQuat result;
+    vmathQMakeFromV4(&result, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar )
+{
+    VmathQuat result;
+    vmathQMakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 )
+{
+    VmathQuat result;
+    vmathQMakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeIdentity_V( )
+{
+    VmathQuat result;
+    vmathQMakeIdentity(&result);
+    return result;
+}
+
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQLerp(&result, t, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 )
+{
+    VmathQuat result;
+    vmathQSlerp(&result, t, &unitQuat0, &unitQuat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 )
+{
+    VmathQuat result;
+    vmathQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
+    return result;
+}
+
+static inline vec_float4 vmathQGet128_V( VmathQuat quat )
+{
+    return vmathQGet128(&quat);
+}
+
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec )
+{
+    vmathQSetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat )
+{
+    VmathVector3 result;
+    vmathQGetXYZ(&result, &quat);
+    return result;
+}
+
+static inline void vmathQSetX_V( VmathQuat *result, float _x )
+{
+    vmathQSetX(result, _x);
+}
+
+static inline float vmathQGetX_V( VmathQuat quat )
+{
+    return vmathQGetX(&quat);
+}
+
+static inline void vmathQSetY_V( VmathQuat *result, float _y )
+{
+    vmathQSetY(result, _y);
+}
+
+static inline float vmathQGetY_V( VmathQuat quat )
+{
+    return vmathQGetY(&quat);
+}
+
+static inline void vmathQSetZ_V( VmathQuat *result, float _z )
+{
+    vmathQSetZ(result, _z);
+}
+
+static inline float vmathQGetZ_V( VmathQuat quat )
+{
+    return vmathQGetZ(&quat);
+}
+
+static inline void vmathQSetW_V( VmathQuat *result, float _w )
+{
+    vmathQSetW(result, _w);
+}
+
+static inline float vmathQGetW_V( VmathQuat quat )
+{
+    return vmathQGetW(&quat);
+}
+
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value )
+{
+    vmathQSetElem(result, idx, value);
+}
+
+static inline float vmathQGetElem_V( VmathQuat quat, int idx )
+{
+    return vmathQGetElem(&quat, idx);
+}
+
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQAdd(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQSub(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarMul(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar )
+{
+    VmathQuat result;
+    vmathQScalarDiv(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathQuat vmathQNeg_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNeg(&result, &quat);
+    return result;
+}
+
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    return vmathQDot(&quat0, &quat1);
+}
+
+static inline float vmathQNorm_V( VmathQuat quat )
+{
+    return vmathQNorm(&quat);
+}
+
+static inline float vmathQLength_V( VmathQuat quat )
+{
+    return vmathQLength(&quat);
+}
+
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQNormalize(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathQuat result;
+    vmathQMakeRotationArc(&result, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec )
+{
+    VmathQuat result;
+    vmathQMakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationX_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationY_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMakeRotationZ_V( float radians )
+{
+    VmathQuat result;
+    vmathQMakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 )
+{
+    VmathQuat result;
+    vmathQMul(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathVector3 vmathQRotate_V( VmathQuat quat, VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathQRotate(&result, &quat, &vec);
+    return result;
+}
+
+static inline VmathQuat vmathQConj_V( VmathQuat quat )
+{
+    VmathQuat result;
+    vmathQConj(&result, &quat);
+    return result;
+}
+
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 )
+{
+    VmathQuat result;
+    vmathQSelect(&result, &quat0, &quat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathQPrint_V( VmathQuat quat )
+{
+    vmathQPrint(&quat);
+}
+
+static inline void vmathQPrints_V( VmathQuat quat, const char *name )
+{
+    vmathQPrints(&quat, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa.h
index cd79e9d8d..d133092f3 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa.h
@@ -1,419 +1,419 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_C_H
-#define _VECTORMATH_QUAT_SOA_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 _w )
-{
-    vmathSoaQSetXYZ( result, xyz );
-    vmathSoaQSetW( result, _w );
-}
-
-static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_float4 vec128 = quat->vec128;
-    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
-    result->w = spu_shuffle( vec128, vec128, shuffle_wwww );
-}
-
-static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( quat0->vec128, quat2->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( quat1->vec128, quat3->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( quat0->vec128, quat2->vec128, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( quat1->vec128, quat3->vec128, _VECTORMATH_SHUF_ZCWD );
-    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    result->w = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-}
-
-static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result )
-{
-    vmathSoaQMakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    VmathSoaQuat tmpQ_0, tmpQ_1;
-    vmathSoaQSub( &tmpQ_0, quat1, quat0 );
-    vmathSoaQScalarMul( &tmpQ_1, &tmpQ_0, t );
-    vmathSoaQAdd( result, quat0, &tmpQ_1 );
-}
-
-static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 )
-{
-    VmathSoaQuat start, tmpQ_0, tmpQ_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaQDot( unitQuat0, unitQuat1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
-    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    vmathSoaQSetX( &start, spu_sel( unitQuat0->x, negatef4( unitQuat0->x ), selectMask ) );
-    vmathSoaQSetY( &start, spu_sel( unitQuat0->y, negatef4( unitQuat0->y ), selectMask ) );
-    vmathSoaQSetZ( &start, spu_sel( unitQuat0->z, negatef4( unitQuat0->z ), selectMask ) );
-    vmathSoaQSetW( &start, spu_sel( unitQuat0->w, negatef4( unitQuat0->w ), selectMask ) );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    vmathSoaQScalarMul( &tmpQ_0, &start, scale0 );
-    vmathSoaQScalarMul( &tmpQ_1, unitQuat1, scale1 );
-    vmathSoaQAdd( result, &tmpQ_0, &tmpQ_1 );
-}
-
-static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 )
-{
-    VmathSoaQuat tmp0, tmp1;
-    vmathSoaQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
-    vmathSoaQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
-    vmathSoaQSlerp( result, spu_mul( spu_mul( spu_splats(2.0f), t ), spu_sub( spu_splats(1.0f), t ) ), &tmp0, &tmp1 );
-}
-
-static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( quat->x, quat->z, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( quat->y, quat->w, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( quat->x, quat->z, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( quat->y, quat->w, _VECTORMATH_SHUF_ZCWD );
-    vmathQMakeFrom128( result0, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
-    vmathQMakeFrom128( result1, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
-    vmathQMakeFrom128( result2, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
-    vmathQMakeFrom128( result3, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
-}
-
-static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat )
-{
-    vmathSoaV3MakeFromElems( result, quat->x, quat->y, quat->z );
-}
-
-static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat )
-{
-    return quat->x;
-}
-
-static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat )
-{
-    return quat->y;
-}
-
-static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat )
-{
-    return quat->z;
-}
-
-static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 _w )
-{
-    result->w = _w;
-}
-
-static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat )
-{
-    return quat->w;
-}
-
-static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx )
-{
-    return *(&quat->x + idx);
-}
-
-static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    result->x = spu_add( quat0->x, quat1->x );
-    result->y = spu_add( quat0->y, quat1->y );
-    result->z = spu_add( quat0->z, quat1->z );
-    result->w = spu_add( quat0->w, quat1->w );
-}
-
-static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    result->x = spu_sub( quat0->x, quat1->x );
-    result->y = spu_sub( quat0->y, quat1->y );
-    result->z = spu_sub( quat0->z, quat1->z );
-    result->w = spu_sub( quat0->w, quat1->w );
-}
-
-static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
-{
-    result->x = spu_mul( quat->x, scalar );
-    result->y = spu_mul( quat->y, scalar );
-    result->z = spu_mul( quat->z, scalar );
-    result->w = spu_mul( quat->w, scalar );
-}
-
-static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
-{
-    result->x = divf4( quat->x, scalar );
-    result->y = divf4( quat->y, scalar );
-    result->z = divf4( quat->z, scalar );
-    result->w = divf4( quat->w, scalar );
-}
-
-static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    result->x = negatef4( quat->x );
-    result->y = negatef4( quat->y );
-    result->z = negatef4( quat->z );
-    result->w = negatef4( quat->w );
-}
-
-static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    vec_float4 result;
-    result = spu_mul( quat0->x, quat1->x );
-    result = spu_add( result, spu_mul( quat0->y, quat1->y ) );
-    result = spu_add( result, spu_mul( quat0->z, quat1->z ) );
-    result = spu_add( result, spu_mul( quat0->w, quat1->w ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat )
-{
-    vec_float4 result;
-    result = spu_mul( quat->x, quat->x );
-    result = spu_add( result, spu_mul( quat->y, quat->y ) );
-    result = spu_add( result, spu_mul( quat->z, quat->z ) );
-    result = spu_add( result, spu_mul( quat->w, quat->w ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat )
-{
-    return sqrtf4( vmathSoaQNorm( quat ) );
-}
-
-static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaQNorm( quat );
-    lenInv = rsqrtf4( lenSqr );
-    result->x = spu_mul( quat->x, lenInv );
-    result->y = spu_mul( quat->y, lenInv );
-    result->z = spu_mul( quat->z, lenInv );
-    result->w = spu_mul( quat->w, lenInv );
-}
-
-static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf4( spu_mul( spu_splats(2.0f), spu_add( spu_splats(1.0f), vmathSoaV3Dot( unitVec0, unitVec1 ) ) ) );
-    recipCosHalfAngleX2 = recipf4( cosHalfAngleX2 );
-    vmathSoaV3Cross( &tmpV3_0, unitVec0, unitVec1 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
-    vmathSoaQMakeFromV3Scalar( result, &tmpV3_1, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ) );
-}
-
-static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
-{
-    VmathSoaVector3 tmpV3_0;
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    vmathSoaV3ScalarMul( &tmpV3_0, unitVec, s );
-    vmathSoaQMakeFromV3Scalar( result, &tmpV3_0, c );
-}
-
-static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, s, spu_splats(0.0f), spu_splats(0.0f), c );
-}
-
-static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, spu_splats(0.0f), s, spu_splats(0.0f), c );
-}
-
-static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    vmathSoaQMakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), s, c );
-}
-
-static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->x ), spu_mul( quat0->x, quat1->w ) ), spu_mul( quat0->y, quat1->z ) ), spu_mul( quat0->z, quat1->y ) );
-    tmpY = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->y ), spu_mul( quat0->y, quat1->w ) ), spu_mul( quat0->z, quat1->x ) ), spu_mul( quat0->x, quat1->z ) );
-    tmpZ = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->z ), spu_mul( quat0->z, quat1->w ) ), spu_mul( quat0->x, quat1->y ) ), spu_mul( quat0->y, quat1->x ) );
-    tmpW = spu_sub( spu_sub( spu_sub( spu_mul( quat0->w, quat1->w ), spu_mul( quat0->x, quat1->x ) ), spu_mul( quat0->y, quat1->y ) ), spu_mul( quat0->z, quat1->z ) );
-    vmathSoaQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
-}
-
-static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *quat, const VmathSoaVector3 *vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = spu_sub( spu_add( spu_mul( quat->w, vec->x ), spu_mul( quat->y, vec->z ) ), spu_mul( quat->z, vec->y ) );
-    tmpY = spu_sub( spu_add( spu_mul( quat->w, vec->y ), spu_mul( quat->z, vec->x ) ), spu_mul( quat->x, vec->z ) );
-    tmpZ = spu_sub( spu_add( spu_mul( quat->w, vec->z ), spu_mul( quat->x, vec->y ) ), spu_mul( quat->y, vec->x ) );
-    tmpW = spu_add( spu_add( spu_mul( quat->x, vec->x ), spu_mul( quat->y, vec->y ) ), spu_mul( quat->z, vec->z ) );
-    result->x = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->x ), spu_mul( tmpX, quat->w ) ), spu_mul( tmpY, quat->z ) ), spu_mul( tmpZ, quat->y ) );
-    result->y = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->y ), spu_mul( tmpY, quat->w ) ), spu_mul( tmpZ, quat->x ) ), spu_mul( tmpX, quat->z ) );
-    result->z = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->z ), spu_mul( tmpZ, quat->w ) ), spu_mul( tmpX, quat->y ) ), spu_mul( tmpY, quat->x ) );
-}
-
-static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat )
-{
-    vmathSoaQMakeFromElems( result, negatef4( quat->x ), negatef4( quat->y ), negatef4( quat->z ), quat->w );
-}
-
-static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 )
-{
-    result->x = spu_sel( quat0->x, quat1->x, select1 );
-    result->y = spu_sel( quat0->y, quat1->y, select1 );
-    result->z = spu_sel( quat0->z, quat1->z, select1 );
-    result->w = spu_sel( quat0->w, quat1->w, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaQPrint( const VmathSoaQuat *quat )
-{
-    VmathQuat vec0, vec1, vec2, vec3;
-    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathQPrint( &vec0 );
-    printf("slot 1:\n");
-    vmathQPrint( &vec1 );
-    printf("slot 2:\n");
-    vmathQPrint( &vec2 );
-    printf("slot 3:\n");
-    vmathQPrint( &vec3 );
-}
-
-static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name )
-{
-    VmathQuat vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathQPrint( &vec0 );
-    printf("slot 1:\n");
-    vmathQPrint( &vec1 );
-    printf("slot 2:\n");
-    vmathQPrint( &vec2 );
-    printf("slot 3:\n");
-    vmathQPrint( &vec3 );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_C_H
+#define _VECTORMATH_QUAT_SOA_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 _w )
+{
+    vmathSoaQSetXYZ( result, xyz );
+    vmathSoaQSetW( result, _w );
+}
+
+static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_float4 vec128 = quat->vec128;
+    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
+    result->w = spu_shuffle( vec128, vec128, shuffle_wwww );
+}
+
+static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( quat0->vec128, quat2->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( quat1->vec128, quat3->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( quat0->vec128, quat2->vec128, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( quat1->vec128, quat3->vec128, _VECTORMATH_SHUF_ZCWD );
+    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    result->w = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+}
+
+static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result )
+{
+    vmathSoaQMakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    VmathSoaQuat tmpQ_0, tmpQ_1;
+    vmathSoaQSub( &tmpQ_0, quat1, quat0 );
+    vmathSoaQScalarMul( &tmpQ_1, &tmpQ_0, t );
+    vmathSoaQAdd( result, quat0, &tmpQ_1 );
+}
+
+static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 )
+{
+    VmathSoaQuat start, tmpQ_0, tmpQ_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaQDot( unitQuat0, unitQuat1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
+    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    vmathSoaQSetX( &start, spu_sel( unitQuat0->x, negatef4( unitQuat0->x ), selectMask ) );
+    vmathSoaQSetY( &start, spu_sel( unitQuat0->y, negatef4( unitQuat0->y ), selectMask ) );
+    vmathSoaQSetZ( &start, spu_sel( unitQuat0->z, negatef4( unitQuat0->z ), selectMask ) );
+    vmathSoaQSetW( &start, spu_sel( unitQuat0->w, negatef4( unitQuat0->w ), selectMask ) );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    vmathSoaQScalarMul( &tmpQ_0, &start, scale0 );
+    vmathSoaQScalarMul( &tmpQ_1, unitQuat1, scale1 );
+    vmathSoaQAdd( result, &tmpQ_0, &tmpQ_1 );
+}
+
+static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 )
+{
+    VmathSoaQuat tmp0, tmp1;
+    vmathSoaQSlerp( &tmp0, t, unitQuat0, unitQuat3 );
+    vmathSoaQSlerp( &tmp1, t, unitQuat1, unitQuat2 );
+    vmathSoaQSlerp( result, spu_mul( spu_mul( spu_splats(2.0f), t ), spu_sub( spu_splats(1.0f), t ) ), &tmp0, &tmp1 );
+}
+
+static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( quat->x, quat->z, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( quat->y, quat->w, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( quat->x, quat->z, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( quat->y, quat->w, _VECTORMATH_SHUF_ZCWD );
+    vmathQMakeFrom128( result0, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
+    vmathQMakeFrom128( result1, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
+    vmathQMakeFrom128( result2, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
+    vmathQMakeFrom128( result3, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
+}
+
+static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat )
+{
+    vmathSoaV3MakeFromElems( result, quat->x, quat->y, quat->z );
+}
+
+static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat )
+{
+    return quat->x;
+}
+
+static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat )
+{
+    return quat->y;
+}
+
+static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat )
+{
+    return quat->z;
+}
+
+static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 _w )
+{
+    result->w = _w;
+}
+
+static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat )
+{
+    return quat->w;
+}
+
+static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx )
+{
+    return *(&quat->x + idx);
+}
+
+static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    result->x = spu_add( quat0->x, quat1->x );
+    result->y = spu_add( quat0->y, quat1->y );
+    result->z = spu_add( quat0->z, quat1->z );
+    result->w = spu_add( quat0->w, quat1->w );
+}
+
+static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    result->x = spu_sub( quat0->x, quat1->x );
+    result->y = spu_sub( quat0->y, quat1->y );
+    result->z = spu_sub( quat0->z, quat1->z );
+    result->w = spu_sub( quat0->w, quat1->w );
+}
+
+static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
+{
+    result->x = spu_mul( quat->x, scalar );
+    result->y = spu_mul( quat->y, scalar );
+    result->z = spu_mul( quat->z, scalar );
+    result->w = spu_mul( quat->w, scalar );
+}
+
+static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar )
+{
+    result->x = divf4( quat->x, scalar );
+    result->y = divf4( quat->y, scalar );
+    result->z = divf4( quat->z, scalar );
+    result->w = divf4( quat->w, scalar );
+}
+
+static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    result->x = negatef4( quat->x );
+    result->y = negatef4( quat->y );
+    result->z = negatef4( quat->z );
+    result->w = negatef4( quat->w );
+}
+
+static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    vec_float4 result;
+    result = spu_mul( quat0->x, quat1->x );
+    result = spu_add( result, spu_mul( quat0->y, quat1->y ) );
+    result = spu_add( result, spu_mul( quat0->z, quat1->z ) );
+    result = spu_add( result, spu_mul( quat0->w, quat1->w ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat )
+{
+    vec_float4 result;
+    result = spu_mul( quat->x, quat->x );
+    result = spu_add( result, spu_mul( quat->y, quat->y ) );
+    result = spu_add( result, spu_mul( quat->z, quat->z ) );
+    result = spu_add( result, spu_mul( quat->w, quat->w ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat )
+{
+    return sqrtf4( vmathSoaQNorm( quat ) );
+}
+
+static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaQNorm( quat );
+    lenInv = rsqrtf4( lenSqr );
+    result->x = spu_mul( quat->x, lenInv );
+    result->y = spu_mul( quat->y, lenInv );
+    result->z = spu_mul( quat->z, lenInv );
+    result->w = spu_mul( quat->w, lenInv );
+}
+
+static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf4( spu_mul( spu_splats(2.0f), spu_add( spu_splats(1.0f), vmathSoaV3Dot( unitVec0, unitVec1 ) ) ) );
+    recipCosHalfAngleX2 = recipf4( cosHalfAngleX2 );
+    vmathSoaV3Cross( &tmpV3_0, unitVec0, unitVec1 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, recipCosHalfAngleX2 );
+    vmathSoaQMakeFromV3Scalar( result, &tmpV3_1, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ) );
+}
+
+static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec )
+{
+    VmathSoaVector3 tmpV3_0;
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    vmathSoaV3ScalarMul( &tmpV3_0, unitVec, s );
+    vmathSoaQMakeFromV3Scalar( result, &tmpV3_0, c );
+}
+
+static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, s, spu_splats(0.0f), spu_splats(0.0f), c );
+}
+
+static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, spu_splats(0.0f), s, spu_splats(0.0f), c );
+}
+
+static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    vmathSoaQMakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), s, c );
+}
+
+static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->x ), spu_mul( quat0->x, quat1->w ) ), spu_mul( quat0->y, quat1->z ) ), spu_mul( quat0->z, quat1->y ) );
+    tmpY = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->y ), spu_mul( quat0->y, quat1->w ) ), spu_mul( quat0->z, quat1->x ) ), spu_mul( quat0->x, quat1->z ) );
+    tmpZ = spu_sub( spu_add( spu_add( spu_mul( quat0->w, quat1->z ), spu_mul( quat0->z, quat1->w ) ), spu_mul( quat0->x, quat1->y ) ), spu_mul( quat0->y, quat1->x ) );
+    tmpW = spu_sub( spu_sub( spu_sub( spu_mul( quat0->w, quat1->w ), spu_mul( quat0->x, quat1->x ) ), spu_mul( quat0->y, quat1->y ) ), spu_mul( quat0->z, quat1->z ) );
+    vmathSoaQMakeFromElems( result, tmpX, tmpY, tmpZ, tmpW );
+}
+
+static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *quat, const VmathSoaVector3 *vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = spu_sub( spu_add( spu_mul( quat->w, vec->x ), spu_mul( quat->y, vec->z ) ), spu_mul( quat->z, vec->y ) );
+    tmpY = spu_sub( spu_add( spu_mul( quat->w, vec->y ), spu_mul( quat->z, vec->x ) ), spu_mul( quat->x, vec->z ) );
+    tmpZ = spu_sub( spu_add( spu_mul( quat->w, vec->z ), spu_mul( quat->x, vec->y ) ), spu_mul( quat->y, vec->x ) );
+    tmpW = spu_add( spu_add( spu_mul( quat->x, vec->x ), spu_mul( quat->y, vec->y ) ), spu_mul( quat->z, vec->z ) );
+    result->x = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->x ), spu_mul( tmpX, quat->w ) ), spu_mul( tmpY, quat->z ) ), spu_mul( tmpZ, quat->y ) );
+    result->y = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->y ), spu_mul( tmpY, quat->w ) ), spu_mul( tmpZ, quat->x ) ), spu_mul( tmpX, quat->z ) );
+    result->z = spu_add( spu_sub( spu_add( spu_mul( tmpW, quat->z ), spu_mul( tmpZ, quat->w ) ), spu_mul( tmpX, quat->y ) ), spu_mul( tmpY, quat->x ) );
+}
+
+static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat )
+{
+    vmathSoaQMakeFromElems( result, negatef4( quat->x ), negatef4( quat->y ), negatef4( quat->z ), quat->w );
+}
+
+static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 )
+{
+    result->x = spu_sel( quat0->x, quat1->x, select1 );
+    result->y = spu_sel( quat0->y, quat1->y, select1 );
+    result->z = spu_sel( quat0->z, quat1->z, select1 );
+    result->w = spu_sel( quat0->w, quat1->w, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaQPrint( const VmathSoaQuat *quat )
+{
+    VmathQuat vec0, vec1, vec2, vec3;
+    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathQPrint( &vec0 );
+    printf("slot 1:\n");
+    vmathQPrint( &vec1 );
+    printf("slot 2:\n");
+    vmathQPrint( &vec2 );
+    printf("slot 3:\n");
+    vmathQPrint( &vec3 );
+}
+
+static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name )
+{
+    VmathQuat vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaQGet4Aos( quat, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathQPrint( &vec0 );
+    printf("slot 1:\n");
+    vmathQPrint( &vec1 );
+    printf("slot 2:\n");
+    vmathQPrint( &vec2 );
+    printf("slot 3:\n");
+    vmathQPrint( &vec3 );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa_v.h
index 601d9da92..f51b43809 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/quat_soa_v.h
@@ -1,319 +1,319 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_V_C_H
-#define _VECTORMATH_QUAT_SOA_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromV4(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFromAos(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeFrom4Aos(&result, &quat0, &quat1, &quat2, &quat3);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeIdentity_V( )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeIdentity(&result);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQLerp(&result, t, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSlerp(&result, t, &unitQuat0, &unitQuat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
-    return result;
-}
-
-static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
-{
-    vmathSoaQGet4Aos(&quat, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec )
-{
-    vmathSoaQSetXYZ(result, &vec);
-}
-
-static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat )
-{
-    VmathSoaVector3 result;
-    vmathSoaQGetXYZ(&result, &quat);
-    return result;
-}
-
-static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 _x )
-{
-    vmathSoaQSetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetX(&quat);
-}
-
-static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 _y )
-{
-    vmathSoaQSetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetY(&quat);
-}
-
-static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 _z )
-{
-    vmathSoaQSetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetZ(&quat);
-}
-
-static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 _w )
-{
-    vmathSoaQSetW(result, _w);
-}
-
-static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat )
-{
-    return vmathSoaQGetW(&quat);
-}
-
-static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value )
-{
-    vmathSoaQSetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx )
-{
-    return vmathSoaQGetElem(&quat, idx);
-}
-
-static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQAdd(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSub(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQScalarMul(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar )
-{
-    VmathSoaQuat result;
-    vmathSoaQScalarDiv(&result, &quat, scalar);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQNeg(&result, &quat);
-    return result;
-}
-
-static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    return vmathSoaQDot(&quat0, &quat1);
-}
-
-static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat )
-{
-    return vmathSoaQNorm(&quat);
-}
-
-static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat )
-{
-    return vmathSoaQLength(&quat);
-}
-
-static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQNormalize(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationArc(&result, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationAxis(&result, radians, &unitVec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationX(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationY(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians )
-{
-    VmathSoaQuat result;
-    vmathSoaQMakeRotationZ(&result, radians);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQMul(&result, &quat0, &quat1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat quat, VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaQRotate(&result, &quat, &vec);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat )
-{
-    VmathSoaQuat result;
-    vmathSoaQConj(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 )
-{
-    VmathSoaQuat result;
-    vmathSoaQSelect(&result, &quat0, &quat1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaQPrint_V( VmathSoaQuat quat )
-{
-    vmathSoaQPrint(&quat);
-}
-
-static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name )
-{
-    vmathSoaQPrints(&quat, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_V_C_H
+#define _VECTORMATH_QUAT_SOA_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromV4(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFromAos(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeFrom4Aos(&result, &quat0, &quat1, &quat2, &quat3);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeIdentity_V( )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeIdentity(&result);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQLerp(&result, t, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSlerp(&result, t, &unitQuat0, &unitQuat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSquad(&result, t, &unitQuat0, &unitQuat1, &unitQuat2, &unitQuat3);
+    return result;
+}
+
+static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 )
+{
+    vmathSoaQGet4Aos(&quat, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec )
+{
+    vmathSoaQSetXYZ(result, &vec);
+}
+
+static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat )
+{
+    VmathSoaVector3 result;
+    vmathSoaQGetXYZ(&result, &quat);
+    return result;
+}
+
+static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 _x )
+{
+    vmathSoaQSetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetX(&quat);
+}
+
+static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 _y )
+{
+    vmathSoaQSetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetY(&quat);
+}
+
+static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 _z )
+{
+    vmathSoaQSetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetZ(&quat);
+}
+
+static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 _w )
+{
+    vmathSoaQSetW(result, _w);
+}
+
+static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat )
+{
+    return vmathSoaQGetW(&quat);
+}
+
+static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value )
+{
+    vmathSoaQSetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx )
+{
+    return vmathSoaQGetElem(&quat, idx);
+}
+
+static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQAdd(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSub(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQScalarMul(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar )
+{
+    VmathSoaQuat result;
+    vmathSoaQScalarDiv(&result, &quat, scalar);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQNeg(&result, &quat);
+    return result;
+}
+
+static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    return vmathSoaQDot(&quat0, &quat1);
+}
+
+static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat )
+{
+    return vmathSoaQNorm(&quat);
+}
+
+static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat )
+{
+    return vmathSoaQLength(&quat);
+}
+
+static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQNormalize(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationArc(&result, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationAxis(&result, radians, &unitVec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationX(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationY(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians )
+{
+    VmathSoaQuat result;
+    vmathSoaQMakeRotationZ(&result, radians);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQMul(&result, &quat0, &quat1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat quat, VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaQRotate(&result, &quat, &vec);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat )
+{
+    VmathSoaQuat result;
+    vmathSoaQConj(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 )
+{
+    VmathSoaQuat result;
+    vmathSoaQSelect(&result, &quat0, &quat1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaQPrint_V( VmathSoaQuat quat )
+{
+    vmathSoaQPrint(&quat);
+}
+
+static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name )
+{
+    vmathSoaQPrints(&quat, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos.h
index 715f27df7..332e0db95 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos.h
@@ -1,1029 +1,1029 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_C_H
-#define _VECTORMATH_VEC_AOS_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0, vec1 );
-    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
-    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0, vec1 );
-    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
-    return spu_add( spu_rlqwbyte( result, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
-    tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
-    result = spu_mul( tmp0, tmp1 );
-    result = spu_nmsub( tmp2, tmp3, result );
-    return result;
-}
-
-static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
-{
-    vec_int4 bexp;
-    vec_uint4 mant, sign, hfloat;
-    vec_uint4 notZero, isInf;
-    const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
-    const vec_uint4 mergeMant = spu_splats(0x000003ffu);
-    const vec_uint4 mergeSign = spu_splats(0x00008000u);
-
-    sign = spu_rlmask((vec_uint4)v, -16);
-    mant = spu_rlmask((vec_uint4)v, -13);
-    bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
-
-    notZero = spu_cmpgt(bexp, 112);
-    isInf = spu_cmpgt(bexp, 142);
-
-    bexp = spu_add(bexp, -112);
-    bexp = spu_sl(bexp, 10);
-
-    hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
-    hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
-    hfloat = spu_sel(hfloat, hfloatInf, isInf);
-    hfloat = spu_sel(hfloat, sign, mergeSign);
-
-    return hfloat;
-}
-
-static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
-{
-    vec_uint4 hfloat_u, hfloat_v;
-    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
-    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
-    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
-    return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
-}
-
-#endif
-
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
-{
-    result->vec128 = (vec_float4){ _x, _y, _z, 0.0f  };
-}
-
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = pnt->vec128;
-}
-
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
-{
-    result->vec128 = spu_splats( scalar );
-}
-
-static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathV3MakeXAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_1000;
-}
-
-static inline void vmathV3MakeYAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0100;
-}
-
-static inline void vmathV3MakeZAxis( VmathVector3 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0010;
-}
-
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathV3Sub( &tmpV3_0, vec1, vec0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathV3Add( result, vec0, &tmpV3_1 );
-}
-
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
-}
-
-static inline vec_float4 vmathV3Get128( const VmathVector3 *vec )
-{
-    return vec->vec128;
-}
-
-static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
-    dstVec = spu_sel(vec->vec128, dstVec, mask);
-    *quad = dstVec;
-}
-
-static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
-    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
-    xyz3 = spu_rlqwbyte( zxyz, 4 );
-    vec0->vec128 = xyzx;
-    vec1->vec128 = xyz1;
-    vec2->vec128 = xyz2;
-    vec3->vec128 = xyz3;
-}
-
-static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = spu_shuffle( vec0->vec128, vec1->vec128, _VECTORMATH_SHUF_XYZA );
-    yzxy = spu_shuffle( vec1->vec128, vec2->vec128, _VECTORMATH_SHUF_YZAB );
-    zxyz = spu_shuffle( vec2->vec128, vec3->vec128, _VECTORMATH_SHUF_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathV3StoreXYZArray( vec0, vec1, vec2, vec3, xyz0 );
-    vmathV3StoreXYZArray( vec4, vec5, vec6, vec7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathV3SetX( VmathVector3 *result, float _x )
-{
-    result->vec128 = spu_insert( _x, result->vec128, 0 );
-}
-
-static inline float vmathV3GetX( const VmathVector3 *vec )
-{
-    return spu_extract( vec->vec128, 0 );
-}
-
-static inline void vmathV3SetY( VmathVector3 *result, float _y )
-{
-    result->vec128 = spu_insert( _y, result->vec128, 1 );
-}
-
-static inline float vmathV3GetY( const VmathVector3 *vec )
-{
-    return spu_extract( vec->vec128, 1 );
-}
-
-static inline void vmathV3SetZ( VmathVector3 *result, float _z )
-{
-    result->vec128 = spu_insert( _z, result->vec128, 2 );
-}
-
-static inline float vmathV3GetZ( const VmathVector3 *vec )
-{
-    return spu_extract( vec->vec128, 2 );
-}
-
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
-{
-    result->vec128 = spu_insert( value, result->vec128, idx );
-}
-
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
-{
-    return spu_extract( vec->vec128, idx );
-}
-
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = spu_add( vec->vec128, pnt1->vec128 );
-}
-
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
-{
-    result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = negatef4( vec->vec128 );
-}
-
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = recipf4( vec->vec128 );
-}
-
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = sqrtf4( vec->vec128 );
-}
-
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = rsqrtf4( vec->vec128 );
-}
-
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = fabsf4( vec->vec128 );
-}
-
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV3MaxElem( const VmathVector3 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
-    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV3MinElem( const VmathVector3 *vec )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
-    result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline float vmathV3Sum( const VmathVector3 *vec )
-{
-    return
-        spu_extract( vec->vec128, 0 ) +
-        spu_extract( vec->vec128, 1 ) +
-        spu_extract( vec->vec128, 2 );
-}
-
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    return spu_extract( _vmathVfDot3( vec0->vec128, vec1->vec128 ), 0 );
-}
-
-static inline float vmathV3LengthSqr( const VmathVector3 *vec )
-{
-    return spu_extract( _vmathVfDot3( vec->vec128, vec->vec128 ), 0 );
-}
-
-static inline float vmathV3Length( const VmathVector3 *vec )
-{
-    return sqrtf( vmathV3LengthSqr( vec ) );
-}
-
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
-{
-    vec_float4 dot = _vmathVfDot3( vec->vec128, vec->vec128 );
-    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
-    result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
-}
-
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
-{
-    result->vec128 = _vmathVfCross( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
-{
-    result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print( const VmathVector3 *vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
-{
-    result->vec128 = (vec_float4){ _x, _y, _z, _w };
-}
-
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
-{
-    result->vec128 = spu_shuffle( xyz->vec128, spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
-}
-
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->vec128 = spu_sel( vec->vec128, spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
-}
-
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = spu_sel( pnt->vec128, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
-}
-
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
-{
-    result->vec128 = quat->vec128;
-}
-
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
-{
-    result->vec128 = spu_splats( scalar );
-}
-
-static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathV4MakeXAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_1000;
-}
-
-static inline void vmathV4MakeYAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0100;
-}
-
-static inline void vmathV4MakeZAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0010;
-}
-
-static inline void vmathV4MakeWAxis( VmathVector4 *result )
-{
-    result->vec128 = _VECTORMATH_UNIT_0001;
-}
-
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    VmathVector4 tmpV4_0, tmpV4_1;
-    vmathV4Sub( &tmpV4_0, vec1, vec0 );
-    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
-    vmathV4Add( result, vec0, &tmpV4_1 );
-}
-
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot4( unitVec0->vec128, unitVec1->vec128 );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
-}
-
-static inline vec_float4 vmathV4Get128( const VmathVector4 *vec )
-{
-    return vec->vec128;
-}
-
-static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads )
-{
-    twoQuads[0] = _vmath2VfToHalfFloats(vec0->vec128, vec1->vec128);
-    twoQuads[1] = _vmath2VfToHalfFloats(vec2->vec128, vec3->vec128);
-}
-
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
-{
-    result->vec128 = spu_sel( vec->vec128, result->vec128, (vec_uint4)spu_maskb(0x000f) );
-}
-
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathV4SetX( VmathVector4 *result, float _x )
-{
-    result->vec128 = spu_insert( _x, result->vec128, 0 );
-}
-
-static inline float vmathV4GetX( const VmathVector4 *vec )
-{
-    return spu_extract( vec->vec128, 0 );
-}
-
-static inline void vmathV4SetY( VmathVector4 *result, float _y )
-{
-    result->vec128 = spu_insert( _y, result->vec128, 1 );
-}
-
-static inline float vmathV4GetY( const VmathVector4 *vec )
-{
-    return spu_extract( vec->vec128, 1 );
-}
-
-static inline void vmathV4SetZ( VmathVector4 *result, float _z )
-{
-    result->vec128 = spu_insert( _z, result->vec128, 2 );
-}
-
-static inline float vmathV4GetZ( const VmathVector4 *vec )
-{
-    return spu_extract( vec->vec128, 2 );
-}
-
-static inline void vmathV4SetW( VmathVector4 *result, float _w )
-{
-    result->vec128 = spu_insert( _w, result->vec128, 3 );
-}
-
-static inline float vmathV4GetW( const VmathVector4 *vec )
-{
-    return spu_extract( vec->vec128, 3 );
-}
-
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
-{
-    result->vec128 = spu_insert( value, result->vec128, idx );
-}
-
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
-{
-    return spu_extract( vec->vec128, idx );
-}
-
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
-{
-    result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
-}
-
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = negatef4( vec->vec128 );
-}
-
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = recipf4( vec->vec128 );
-}
-
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = sqrtf4( vec->vec128 );
-}
-
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = rsqrtf4( vec->vec128 );
-}
-
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
-{
-    result->vec128 = fabsf4( vec->vec128 );
-}
-
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV4MaxElem( const VmathVector4 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
-    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
-    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
-}
-
-static inline float vmathV4MinElem( const VmathVector4 *vec )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
-    result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
-    result = fminf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline float vmathV4Sum( const VmathVector4 *vec )
-{
-    return
-        spu_extract( vec->vec128, 0 ) +
-        spu_extract( vec->vec128, 1 ) +
-        spu_extract( vec->vec128, 2 ) +
-        spu_extract( vec->vec128, 3 );
-}
-
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
-{
-    return spu_extract( _vmathVfDot4( vec0->vec128, vec1->vec128 ), 0 );
-}
-
-static inline float vmathV4LengthSqr( const VmathVector4 *vec )
-{
-    return spu_extract( _vmathVfDot4( vec->vec128, vec->vec128 ), 0 );
-}
-
-static inline float vmathV4Length( const VmathVector4 *vec )
-{
-    return sqrtf( vmathV4LengthSqr( vec ) );
-}
-
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
-{
-    vec_float4 dot = _vmathVfDot4( vec->vec128, vec->vec128 );
-    result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
-}
-
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
-{
-    result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print( const VmathVector4 *vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec->vec128;
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = pnt->vec128;
-}
-
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
-{
-    result->vec128 = (vec_float4){ _x, _y, _z, 0.0f  };
-}
-
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
-{
-    result->vec128 = vec->vec128;
-}
-
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
-{
-    result->vec128 = spu_splats( scalar );
-}
-
-static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 )
-{
-    result->vec128 = vf4;
-}
-
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0, tmpV3_1;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathP3AddV3( result, pnt0, &tmpV3_1 );
-}
-
-static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt )
-{
-    return pnt->vec128;
-}
-
-static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
-    dstVec = spu_sel(pnt->vec128, dstVec, mask);
-    *quad = dstVec;
-}
-
-static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
-    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
-    xyz3 = spu_rlqwbyte( zxyz, 4 );
-    pnt0->vec128 = xyzx;
-    pnt1->vec128 = xyz1;
-    pnt2->vec128 = xyz2;
-    pnt3->vec128 = xyz3;
-}
-
-static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = spu_shuffle( pnt0->vec128, pnt1->vec128, _VECTORMATH_SHUF_XYZA );
-    yzxy = spu_shuffle( pnt1->vec128, pnt2->vec128, _VECTORMATH_SHUF_YZAB );
-    zxyz = spu_shuffle( pnt2->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathP3StoreXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
-    vmathP3StoreXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathP3SetX( VmathPoint3 *result, float _x )
-{
-    result->vec128 = spu_insert( _x, result->vec128, 0 );
-}
-
-static inline float vmathP3GetX( const VmathPoint3 *pnt )
-{
-    return spu_extract( pnt->vec128, 0 );
-}
-
-static inline void vmathP3SetY( VmathPoint3 *result, float _y )
-{
-    result->vec128 = spu_insert( _y, result->vec128, 1 );
-}
-
-static inline float vmathP3GetY( const VmathPoint3 *pnt )
-{
-    return spu_extract( pnt->vec128, 1 );
-}
-
-static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
-{
-    result->vec128 = spu_insert( _z, result->vec128, 2 );
-}
-
-static inline float vmathP3GetZ( const VmathPoint3 *pnt )
-{
-    return spu_extract( pnt->vec128, 2 );
-}
-
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
-{
-    result->vec128 = spu_insert( value, result->vec128, idx );
-}
-
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
-{
-    return spu_extract( pnt->vec128, idx );
-}
-
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = spu_sub( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->vec128 = spu_add( pnt->vec128, vec1->vec128 );
-}
-
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
-{
-    result->vec128 = spu_sub( pnt->vec128, vec1->vec128 );
-}
-
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = spu_mul( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = divf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = recipf4( pnt->vec128 );
-}
-
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = sqrtf4( pnt->vec128 );
-}
-
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = rsqrtf4( pnt->vec128 );
-}
-
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
-{
-    result->vec128 = fabsf4( pnt->vec128 );
-}
-
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = copysignf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = fmaxf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
-    result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    result->vec128 = fminf4( pnt0->vec128, pnt1->vec128 );
-}
-
-static inline float vmathP3MinElem( const VmathPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
-    result = fminf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-static inline float vmathP3Sum( const VmathPoint3 *pnt )
-{
-    return
-        spu_extract( pnt->vec128, 0 ) +
-        spu_extract( pnt->vec128, 1 ) +
-        spu_extract( pnt->vec128, 2 );
-}
-
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
-{
-    VmathPoint3 tmpP3_0;
-    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
-    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
-{
-    return spu_extract( _vmathVfDot3( pnt->vec128, unitVec->vec128 ), 0 );
-}
-
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
-{
-    VmathVector3 tmpV3_0;
-    vmathV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3LengthSqr( &tmpV3_0 );
-}
-
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
-{
-    VmathVector3 tmpV3_0;
-    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathV3Length( &tmpV3_0 );
-}
-
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
-{
-    result->vec128 = spu_sel( pnt0->vec128, pnt1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print( const VmathPoint3 *pnt )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt->vec128;
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt->vec128;
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_C_H
+#define _VECTORMATH_VEC_AOS_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0, vec1 );
+    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0, vec1 );
+    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+    return spu_add( spu_rlqwbyte( result, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
+    result = spu_mul( tmp0, tmp1 );
+    result = spu_nmsub( tmp2, tmp3, result );
+    return result;
+}
+
+static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
+{
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
+    const vec_uint4 mergeMant = spu_splats(0x000003ffu);
+    const vec_uint4 mergeSign = spu_splats(0x00008000u);
+
+    sign = spu_rlmask((vec_uint4)v, -16);
+    mant = spu_rlmask((vec_uint4)v, -13);
+    bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
+
+    notZero = spu_cmpgt(bexp, 112);
+    isInf = spu_cmpgt(bexp, 142);
+
+    bexp = spu_add(bexp, -112);
+    bexp = spu_sl(bexp, 10);
+
+    hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
+    hfloat = spu_sel(hfloat, hfloatInf, isInf);
+    hfloat = spu_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+}
+
+static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
+{
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
+}
+
+#endif
+
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float _x, float _y, float _z )
+{
+    result->vec128 = (vec_float4){ _x, _y, _z, 0.0f  };
+}
+
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = pnt->vec128;
+}
+
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar )
+{
+    result->vec128 = spu_splats( scalar );
+}
+
+static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathV3MakeXAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_1000;
+}
+
+static inline void vmathV3MakeYAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0100;
+}
+
+static inline void vmathV3MakeZAxis( VmathVector3 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0010;
+}
+
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathV3Sub( &tmpV3_0, vec1, vec0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathV3Add( result, vec0, &tmpV3_1 );
+}
+
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot3( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
+}
+
+static inline vec_float4 vmathV3Get128( const VmathVector3 *vec )
+{
+    return vec->vec128;
+}
+
+static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
+    dstVec = spu_sel(vec->vec128, dstVec, mask);
+    *quad = dstVec;
+}
+
+static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
+    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
+    xyz3 = spu_rlqwbyte( zxyz, 4 );
+    vec0->vec128 = xyzx;
+    vec1->vec128 = xyz1;
+    vec2->vec128 = xyz2;
+    vec3->vec128 = xyz3;
+}
+
+static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = spu_shuffle( vec0->vec128, vec1->vec128, _VECTORMATH_SHUF_XYZA );
+    yzxy = spu_shuffle( vec1->vec128, vec2->vec128, _VECTORMATH_SHUF_YZAB );
+    zxyz = spu_shuffle( vec2->vec128, vec3->vec128, _VECTORMATH_SHUF_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathV3StoreXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    vmathV3StoreXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathV3SetX( VmathVector3 *result, float _x )
+{
+    result->vec128 = spu_insert( _x, result->vec128, 0 );
+}
+
+static inline float vmathV3GetX( const VmathVector3 *vec )
+{
+    return spu_extract( vec->vec128, 0 );
+}
+
+static inline void vmathV3SetY( VmathVector3 *result, float _y )
+{
+    result->vec128 = spu_insert( _y, result->vec128, 1 );
+}
+
+static inline float vmathV3GetY( const VmathVector3 *vec )
+{
+    return spu_extract( vec->vec128, 1 );
+}
+
+static inline void vmathV3SetZ( VmathVector3 *result, float _z )
+{
+    result->vec128 = spu_insert( _z, result->vec128, 2 );
+}
+
+static inline float vmathV3GetZ( const VmathVector3 *vec )
+{
+    return spu_extract( vec->vec128, 2 );
+}
+
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value )
+{
+    result->vec128 = spu_insert( value, result->vec128, idx );
+}
+
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx )
+{
+    return spu_extract( vec->vec128, idx );
+}
+
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = spu_add( vec->vec128, pnt1->vec128 );
+}
+
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar )
+{
+    result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = negatef4( vec->vec128 );
+}
+
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = recipf4( vec->vec128 );
+}
+
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = sqrtf4( vec->vec128 );
+}
+
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = rsqrtf4( vec->vec128 );
+}
+
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = fabsf4( vec->vec128 );
+}
+
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV3MaxElem( const VmathVector3 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
+    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV3MinElem( const VmathVector3 *vec )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
+    result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline float vmathV3Sum( const VmathVector3 *vec )
+{
+    return
+        spu_extract( vec->vec128, 0 ) +
+        spu_extract( vec->vec128, 1 ) +
+        spu_extract( vec->vec128, 2 );
+}
+
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    return spu_extract( _vmathVfDot3( vec0->vec128, vec1->vec128 ), 0 );
+}
+
+static inline float vmathV3LengthSqr( const VmathVector3 *vec )
+{
+    return spu_extract( _vmathVfDot3( vec->vec128, vec->vec128 ), 0 );
+}
+
+static inline float vmathV3Length( const VmathVector3 *vec )
+{
+    return sqrtf( vmathV3LengthSqr( vec ) );
+}
+
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec )
+{
+    vec_float4 dot = _vmathVfDot3( vec->vec128, vec->vec128 );
+    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
+    result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
+}
+
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 )
+{
+    result->vec128 = _vmathVfCross( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 )
+{
+    result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print( const VmathVector3 *vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float _x, float _y, float _z, float _w )
+{
+    result->vec128 = (vec_float4){ _x, _y, _z, _w };
+}
+
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float _w )
+{
+    result->vec128 = spu_shuffle( xyz->vec128, spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
+}
+
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->vec128 = spu_sel( vec->vec128, spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
+}
+
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = spu_sel( pnt->vec128, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
+}
+
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat )
+{
+    result->vec128 = quat->vec128;
+}
+
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar )
+{
+    result->vec128 = spu_splats( scalar );
+}
+
+static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathV4MakeXAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_1000;
+}
+
+static inline void vmathV4MakeYAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0100;
+}
+
+static inline void vmathV4MakeZAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0010;
+}
+
+static inline void vmathV4MakeWAxis( VmathVector4 *result )
+{
+    result->vec128 = _VECTORMATH_UNIT_0001;
+}
+
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    VmathVector4 tmpV4_0, tmpV4_1;
+    vmathV4Sub( &tmpV4_0, vec1, vec0 );
+    vmathV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
+    vmathV4Add( result, vec0, &tmpV4_1 );
+}
+
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot4( unitVec0->vec128, unitVec1->vec128 );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    result->vec128 = spu_madd( unitVec0->vec128, scale0, spu_mul( unitVec1->vec128, scale1 ) );
+}
+
+static inline vec_float4 vmathV4Get128( const VmathVector4 *vec )
+{
+    return vec->vec128;
+}
+
+static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0->vec128, vec1->vec128);
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2->vec128, vec3->vec128);
+}
+
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec )
+{
+    result->vec128 = spu_sel( vec->vec128, result->vec128, (vec_uint4)spu_maskb(0x000f) );
+}
+
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathV4SetX( VmathVector4 *result, float _x )
+{
+    result->vec128 = spu_insert( _x, result->vec128, 0 );
+}
+
+static inline float vmathV4GetX( const VmathVector4 *vec )
+{
+    return spu_extract( vec->vec128, 0 );
+}
+
+static inline void vmathV4SetY( VmathVector4 *result, float _y )
+{
+    result->vec128 = spu_insert( _y, result->vec128, 1 );
+}
+
+static inline float vmathV4GetY( const VmathVector4 *vec )
+{
+    return spu_extract( vec->vec128, 1 );
+}
+
+static inline void vmathV4SetZ( VmathVector4 *result, float _z )
+{
+    result->vec128 = spu_insert( _z, result->vec128, 2 );
+}
+
+static inline float vmathV4GetZ( const VmathVector4 *vec )
+{
+    return spu_extract( vec->vec128, 2 );
+}
+
+static inline void vmathV4SetW( VmathVector4 *result, float _w )
+{
+    result->vec128 = spu_insert( _w, result->vec128, 3 );
+}
+
+static inline float vmathV4GetW( const VmathVector4 *vec )
+{
+    return spu_extract( vec->vec128, 3 );
+}
+
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value )
+{
+    result->vec128 = spu_insert( value, result->vec128, idx );
+}
+
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx )
+{
+    return spu_extract( vec->vec128, idx );
+}
+
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = spu_add( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = spu_sub( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->vec128 = spu_mul( vec->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar )
+{
+    result->vec128 = divf4( vec->vec128, spu_splats(scalar) );
+}
+
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = negatef4( vec->vec128 );
+}
+
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = spu_mul( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = divf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = recipf4( vec->vec128 );
+}
+
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = sqrtf4( vec->vec128 );
+}
+
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = rsqrtf4( vec->vec128 );
+}
+
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec )
+{
+    result->vec128 = fabsf4( vec->vec128 );
+}
+
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = copysignf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = fmaxf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV4MaxElem( const VmathVector4 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
+    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
+    result = fmaxf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    result->vec128 = fminf4( vec0->vec128, vec1->vec128 );
+}
+
+static inline float vmathV4MinElem( const VmathVector4 *vec )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( vec->vec128, 1 ), 0 ), vec->vec128 );
+    result = fminf4( spu_promote( spu_extract( vec->vec128, 2 ), 0 ), result );
+    result = fminf4( spu_promote( spu_extract( vec->vec128, 3 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline float vmathV4Sum( const VmathVector4 *vec )
+{
+    return
+        spu_extract( vec->vec128, 0 ) +
+        spu_extract( vec->vec128, 1 ) +
+        spu_extract( vec->vec128, 2 ) +
+        spu_extract( vec->vec128, 3 );
+}
+
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 )
+{
+    return spu_extract( _vmathVfDot4( vec0->vec128, vec1->vec128 ), 0 );
+}
+
+static inline float vmathV4LengthSqr( const VmathVector4 *vec )
+{
+    return spu_extract( _vmathVfDot4( vec->vec128, vec->vec128 ), 0 );
+}
+
+static inline float vmathV4Length( const VmathVector4 *vec )
+{
+    return sqrtf( vmathV4LengthSqr( vec ) );
+}
+
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec )
+{
+    vec_float4 dot = _vmathVfDot4( vec->vec128, vec->vec128 );
+    result->vec128 = spu_mul( vec->vec128, rsqrtf4( dot ) );
+}
+
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 )
+{
+    result->vec128 = spu_sel( vec0->vec128, vec1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print( const VmathVector4 *vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec->vec128;
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = pnt->vec128;
+}
+
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float _x, float _y, float _z )
+{
+    result->vec128 = (vec_float4){ _x, _y, _z, 0.0f  };
+}
+
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec )
+{
+    result->vec128 = vec->vec128;
+}
+
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar )
+{
+    result->vec128 = spu_splats( scalar );
+}
+
+static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 )
+{
+    result->vec128 = vf4;
+}
+
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0, tmpV3_1;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    vmathV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathP3AddV3( result, pnt0, &tmpV3_1 );
+}
+
+static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt )
+{
+    return pnt->vec128;
+}
+
+static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
+    dstVec = spu_sel(pnt->vec128, dstVec, mask);
+    *quad = dstVec;
+}
+
+static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
+    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
+    xyz3 = spu_rlqwbyte( zxyz, 4 );
+    pnt0->vec128 = xyzx;
+    pnt1->vec128 = xyz1;
+    pnt2->vec128 = xyz2;
+    pnt3->vec128 = xyz3;
+}
+
+static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = spu_shuffle( pnt0->vec128, pnt1->vec128, _VECTORMATH_SHUF_XYZA );
+    yzxy = spu_shuffle( pnt1->vec128, pnt2->vec128, _VECTORMATH_SHUF_YZAB );
+    zxyz = spu_shuffle( pnt2->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathP3StoreXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    vmathP3StoreXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathP3SetX( VmathPoint3 *result, float _x )
+{
+    result->vec128 = spu_insert( _x, result->vec128, 0 );
+}
+
+static inline float vmathP3GetX( const VmathPoint3 *pnt )
+{
+    return spu_extract( pnt->vec128, 0 );
+}
+
+static inline void vmathP3SetY( VmathPoint3 *result, float _y )
+{
+    result->vec128 = spu_insert( _y, result->vec128, 1 );
+}
+
+static inline float vmathP3GetY( const VmathPoint3 *pnt )
+{
+    return spu_extract( pnt->vec128, 1 );
+}
+
+static inline void vmathP3SetZ( VmathPoint3 *result, float _z )
+{
+    result->vec128 = spu_insert( _z, result->vec128, 2 );
+}
+
+static inline float vmathP3GetZ( const VmathPoint3 *pnt )
+{
+    return spu_extract( pnt->vec128, 2 );
+}
+
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value )
+{
+    result->vec128 = spu_insert( value, result->vec128, idx );
+}
+
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx )
+{
+    return spu_extract( pnt->vec128, idx );
+}
+
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = spu_sub( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->vec128 = spu_add( pnt->vec128, vec1->vec128 );
+}
+
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec1 )
+{
+    result->vec128 = spu_sub( pnt->vec128, vec1->vec128 );
+}
+
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = spu_mul( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = divf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = recipf4( pnt->vec128 );
+}
+
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = sqrtf4( pnt->vec128 );
+}
+
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = rsqrtf4( pnt->vec128 );
+}
+
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt )
+{
+    result->vec128 = fabsf4( pnt->vec128 );
+}
+
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = copysignf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = fmaxf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
+    result = fmaxf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    result->vec128 = fminf4( pnt0->vec128, pnt1->vec128 );
+}
+
+static inline float vmathP3MinElem( const VmathPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( pnt->vec128, 1 ), 0 ), pnt->vec128 );
+    result = fminf4( spu_promote( spu_extract( pnt->vec128, 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+static inline float vmathP3Sum( const VmathPoint3 *pnt )
+{
+    return
+        spu_extract( pnt->vec128, 0 ) +
+        spu_extract( pnt->vec128, 1 ) +
+        spu_extract( pnt->vec128, 2 );
+}
+
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromScalar( &tmpP3_0, scaleVal );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec )
+{
+    VmathPoint3 tmpP3_0;
+    vmathP3MakeFromV3( &tmpP3_0, scaleVec );
+    vmathP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec )
+{
+    return spu_extract( _vmathVfDot3( pnt->vec128, unitVec->vec128 ), 0 );
+}
+
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt )
+{
+    VmathVector3 tmpV3_0;
+    vmathV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3LengthSqr( &tmpV3_0 );
+}
+
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 )
+{
+    VmathVector3 tmpV3_0;
+    vmathP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathV3Length( &tmpV3_0 );
+}
+
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 )
+{
+    result->vec128 = spu_sel( pnt0->vec128, pnt1->vec128, spu_splats( (unsigned int)-(select1 > 0) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print( const VmathPoint3 *pnt )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt->vec128;
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt->vec128;
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos_v.h
index 7cf0fa9cc..7b41134e6 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_aos_v.h
@@ -1,951 +1,951 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_V_C_H
-#define _VECTORMATH_VEC_AOS_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathVector3 result;
-    vmathV3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector3 result;
-    vmathV3MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
-{
-    VmathVector3 result;
-    vmathV3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathVector3 result;
-    vmathV3MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeXAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeYAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MakeZAxis_V( )
-{
-    VmathVector3 result;
-    vmathV3MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
-{
-    VmathVector3 result;
-    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline vec_float4 vmathV3Get128_V( VmathVector3 vec )
-{
-    return vmathV3Get128(&vec);
-}
-
-static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad )
-{
-    vmathV3StoreXYZ(&vec, quad);
-}
-
-static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
-{
-    vmathV3LoadXYZArray(vec0, vec1, vec2, vec3, threeQuads);
-}
-
-static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads )
-{
-    vmathV3StoreXYZArray(&vec0, &vec1, &vec2, &vec3, threeQuads);
-}
-
-static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads )
-{
-    vmathV3StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, &vec4, &vec5, &vec6, &vec7, threeQuads);
-}
-
-static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
-{
-    vmathV3SetX(result, _x);
-}
-
-static inline float vmathV3GetX_V( VmathVector3 vec )
-{
-    return vmathV3GetX(&vec);
-}
-
-static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
-{
-    vmathV3SetY(result, _y);
-}
-
-static inline float vmathV3GetY_V( VmathVector3 vec )
-{
-    return vmathV3GetY(&vec);
-}
-
-static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
-{
-    vmathV3SetZ(result, _z);
-}
-
-static inline float vmathV3GetZ_V( VmathVector3 vec )
-{
-    return vmathV3GetZ(&vec);
-}
-
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
-{
-    vmathV3SetElem(result, idx, value);
-}
-
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
-{
-    return vmathV3GetElem(&vec, idx);
-}
-
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathV3AddP3(&result, &vec, &pnt1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
-{
-    VmathVector3 result;
-    vmathV3ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MaxElem_V( VmathVector3 vec )
-{
-    return vmathV3MaxElem(&vec);
-}
-
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV3MinElem_V( VmathVector3 vec )
-{
-    return vmathV3MinElem(&vec);
-}
-
-static inline float vmathV3Sum_V( VmathVector3 vec )
-{
-    return vmathV3Sum(&vec);
-}
-
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    return vmathV3Dot(&vec0, &vec1);
-}
-
-static inline float vmathV3LengthSqr_V( VmathVector3 vec )
-{
-    return vmathV3LengthSqr(&vec);
-}
-
-static inline float vmathV3Length_V( VmathVector3 vec )
-{
-    return vmathV3Length(&vec);
-}
-
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
-{
-    VmathVector3 result;
-    vmathV3Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
-{
-    VmathVector3 result;
-    vmathV3Cross(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
-{
-    VmathVector3 result;
-    vmathV3Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV3Print_V( VmathVector3 vec )
-{
-    vmathV3Print(&vec);
-}
-
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
-{
-    vmathV3Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
-{
-    VmathVector4 result;
-    vmathV4MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
-{
-    VmathVector4 result;
-    vmathV4MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
-{
-    VmathVector4 result;
-    vmathV4MakeFromQ(&result, &quat);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
-{
-    VmathVector4 result;
-    vmathV4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathVector4 result;
-    vmathV4MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeXAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeYAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeZAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MakeWAxis_V( )
-{
-    VmathVector4 result;
-    vmathV4MakeWAxis(&result);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
-{
-    VmathVector4 result;
-    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline vec_float4 vmathV4Get128_V( VmathVector4 vec )
-{
-    return vmathV4Get128(&vec);
-}
-
-static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads )
-{
-    vmathV4StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, twoQuads);
-}
-
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
-{
-    vmathV4SetXYZ(result, &vec);
-}
-
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
-{
-    VmathVector3 result;
-    vmathV4GetXYZ(&result, &vec);
-    return result;
-}
-
-static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
-{
-    vmathV4SetX(result, _x);
-}
-
-static inline float vmathV4GetX_V( VmathVector4 vec )
-{
-    return vmathV4GetX(&vec);
-}
-
-static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
-{
-    vmathV4SetY(result, _y);
-}
-
-static inline float vmathV4GetY_V( VmathVector4 vec )
-{
-    return vmathV4GetY(&vec);
-}
-
-static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
-{
-    vmathV4SetZ(result, _z);
-}
-
-static inline float vmathV4GetZ_V( VmathVector4 vec )
-{
-    return vmathV4GetZ(&vec);
-}
-
-static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
-{
-    vmathV4SetW(result, _w);
-}
-
-static inline float vmathV4GetW_V( VmathVector4 vec )
-{
-    return vmathV4GetW(&vec);
-}
-
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
-{
-    vmathV4SetElem(result, idx, value);
-}
-
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
-{
-    return vmathV4GetElem(&vec, idx);
-}
-
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
-{
-    VmathVector4 result;
-    vmathV4ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MaxElem_V( VmathVector4 vec )
-{
-    return vmathV4MaxElem(&vec);
-}
-
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    VmathVector4 result;
-    vmathV4MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline float vmathV4MinElem_V( VmathVector4 vec )
-{
-    return vmathV4MinElem(&vec);
-}
-
-static inline float vmathV4Sum_V( VmathVector4 vec )
-{
-    return vmathV4Sum(&vec);
-}
-
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
-{
-    return vmathV4Dot(&vec0, &vec1);
-}
-
-static inline float vmathV4LengthSqr_V( VmathVector4 vec )
-{
-    return vmathV4LengthSqr(&vec);
-}
-
-static inline float vmathV4Length_V( VmathVector4 vec )
-{
-    return vmathV4Length(&vec);
-}
-
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
-{
-    VmathVector4 result;
-    vmathV4Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
-{
-    VmathVector4 result;
-    vmathV4Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathV4Print_V( VmathVector4 vec )
-{
-    vmathV4Print(&vec);
-}
-
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
-{
-    vmathV4Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
-{
-    VmathPoint3 result;
-    vmathP3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 )
-{
-    VmathPoint3 result;
-    vmathP3MakeFrom128(&result, vf4);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3Lerp(&result, t, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt )
-{
-    return vmathP3Get128(&pnt);
-}
-
-static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad )
-{
-    vmathP3StoreXYZ(&pnt, quad);
-}
-
-static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
-{
-    vmathP3LoadXYZArray(pnt0, pnt1, pnt2, pnt3, threeQuads);
-}
-
-static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads )
-{
-    vmathP3StoreXYZArray(&pnt0, &pnt1, &pnt2, &pnt3, threeQuads);
-}
-
-static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads )
-{
-    vmathP3StoreHalfFloats(&pnt0, &pnt1, &pnt2, &pnt3, &pnt4, &pnt5, &pnt6, &pnt7, threeQuads);
-}
-
-static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
-{
-    vmathP3SetX(result, _x);
-}
-
-static inline float vmathP3GetX_V( VmathPoint3 pnt )
-{
-    return vmathP3GetX(&pnt);
-}
-
-static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
-{
-    vmathP3SetY(result, _y);
-}
-
-static inline float vmathP3GetY_V( VmathPoint3 pnt )
-{
-    return vmathP3GetY(&pnt);
-}
-
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
-{
-    vmathP3SetZ(result, _z);
-}
-
-static inline float vmathP3GetZ_V( VmathPoint3 pnt )
-{
-    return vmathP3GetZ(&pnt);
-}
-
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
-{
-    vmathP3SetElem(result, idx, value);
-}
-
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
-{
-    return vmathP3GetElem(&pnt, idx);
-}
-
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathVector3 result;
-    vmathP3Sub(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3AddV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
-{
-    VmathPoint3 result;
-    vmathP3SubV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MulPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3DivPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RecipPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3SqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3RsqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
-{
-    VmathPoint3 result;
-    vmathP3AbsPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MaxElem(&pnt);
-}
-
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    VmathPoint3 result;
-    vmathP3MinPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline float vmathP3MinElem_V( VmathPoint3 pnt )
-{
-    return vmathP3MinElem(&pnt);
-}
-
-static inline float vmathP3Sum_V( VmathPoint3 pnt )
-{
-    return vmathP3Sum(&pnt);
-}
-
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
-{
-    VmathPoint3 result;
-    vmathP3Scale(&result, &pnt, scaleVal);
-    return result;
-}
-
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
-{
-    VmathPoint3 result;
-    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
-    return result;
-}
-
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
-{
-    return vmathP3Projection(&pnt, &unitVec);
-}
-
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistSqrFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
-{
-    return vmathP3DistFromOrigin(&pnt);
-}
-
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3DistSqr(&pnt0, &pnt1);
-}
-
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
-{
-    return vmathP3Dist(&pnt0, &pnt1);
-}
-
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
-{
-    VmathPoint3 result;
-    vmathP3Select(&result, &pnt0, &pnt1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathP3Print_V( VmathPoint3 pnt )
-{
-    vmathP3Print(&pnt);
-}
-
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
-{
-    vmathP3Prints(&pnt, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_V_C_H
+#define _VECTORMATH_VEC_AOS_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathVector3 vmathV3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathVector3 result;
+    vmathV3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector3 result;
+    vmathV3MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar )
+{
+    VmathVector3 result;
+    vmathV3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathVector3 result;
+    vmathV3MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeXAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeYAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MakeZAxis_V( )
+{
+    VmathVector3 result;
+    vmathV3MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 )
+{
+    VmathVector3 result;
+    vmathV3Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline vec_float4 vmathV3Get128_V( VmathVector3 vec )
+{
+    return vmathV3Get128(&vec);
+}
+
+static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad )
+{
+    vmathV3StoreXYZ(&vec, quad);
+}
+
+static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads )
+{
+    vmathV3LoadXYZArray(vec0, vec1, vec2, vec3, threeQuads);
+}
+
+static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads )
+{
+    vmathV3StoreXYZArray(&vec0, &vec1, &vec2, &vec3, threeQuads);
+}
+
+static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads )
+{
+    vmathV3StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, &vec4, &vec5, &vec6, &vec7, threeQuads);
+}
+
+static inline void vmathV3SetX_V( VmathVector3 *result, float _x )
+{
+    vmathV3SetX(result, _x);
+}
+
+static inline float vmathV3GetX_V( VmathVector3 vec )
+{
+    return vmathV3GetX(&vec);
+}
+
+static inline void vmathV3SetY_V( VmathVector3 *result, float _y )
+{
+    vmathV3SetY(result, _y);
+}
+
+static inline float vmathV3GetY_V( VmathVector3 vec )
+{
+    return vmathV3GetY(&vec);
+}
+
+static inline void vmathV3SetZ_V( VmathVector3 *result, float _z )
+{
+    vmathV3SetZ(result, _z);
+}
+
+static inline float vmathV3GetZ_V( VmathVector3 vec )
+{
+    return vmathV3GetZ(&vec);
+}
+
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value )
+{
+    vmathV3SetElem(result, idx, value);
+}
+
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx )
+{
+    return vmathV3GetElem(&vec, idx);
+}
+
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathV3AddP3(&result, &vec, &pnt1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar )
+{
+    VmathVector3 result;
+    vmathV3ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MaxElem_V( VmathVector3 vec )
+{
+    return vmathV3MaxElem(&vec);
+}
+
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV3MinElem_V( VmathVector3 vec )
+{
+    return vmathV3MinElem(&vec);
+}
+
+static inline float vmathV3Sum_V( VmathVector3 vec )
+{
+    return vmathV3Sum(&vec);
+}
+
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    return vmathV3Dot(&vec0, &vec1);
+}
+
+static inline float vmathV3LengthSqr_V( VmathVector3 vec )
+{
+    return vmathV3LengthSqr(&vec);
+}
+
+static inline float vmathV3Length_V( VmathVector3 vec )
+{
+    return vmathV3Length(&vec);
+}
+
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec )
+{
+    VmathVector3 result;
+    vmathV3Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 )
+{
+    VmathVector3 result;
+    vmathV3Cross(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 )
+{
+    VmathVector3 result;
+    vmathV3Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV3Print_V( VmathVector3 vec )
+{
+    vmathV3Print(&vec);
+}
+
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name )
+{
+    vmathV3Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathVector4 vmathV4MakeFromElems_V( float _x, float _y, float _z, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float _w )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec )
+{
+    VmathVector4 result;
+    vmathV4MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt )
+{
+    VmathVector4 result;
+    vmathV4MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat )
+{
+    VmathVector4 result;
+    vmathV4MakeFromQ(&result, &quat);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar )
+{
+    VmathVector4 result;
+    vmathV4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathVector4 result;
+    vmathV4MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeXAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeYAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeZAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MakeWAxis_V( )
+{
+    VmathVector4 result;
+    vmathV4MakeWAxis(&result);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 )
+{
+    VmathVector4 result;
+    vmathV4Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline vec_float4 vmathV4Get128_V( VmathVector4 vec )
+{
+    return vmathV4Get128(&vec);
+}
+
+static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads )
+{
+    vmathV4StoreHalfFloats(&vec0, &vec1, &vec2, &vec3, twoQuads);
+}
+
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec )
+{
+    vmathV4SetXYZ(result, &vec);
+}
+
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec )
+{
+    VmathVector3 result;
+    vmathV4GetXYZ(&result, &vec);
+    return result;
+}
+
+static inline void vmathV4SetX_V( VmathVector4 *result, float _x )
+{
+    vmathV4SetX(result, _x);
+}
+
+static inline float vmathV4GetX_V( VmathVector4 vec )
+{
+    return vmathV4GetX(&vec);
+}
+
+static inline void vmathV4SetY_V( VmathVector4 *result, float _y )
+{
+    vmathV4SetY(result, _y);
+}
+
+static inline float vmathV4GetY_V( VmathVector4 vec )
+{
+    return vmathV4GetY(&vec);
+}
+
+static inline void vmathV4SetZ_V( VmathVector4 *result, float _z )
+{
+    vmathV4SetZ(result, _z);
+}
+
+static inline float vmathV4GetZ_V( VmathVector4 vec )
+{
+    return vmathV4GetZ(&vec);
+}
+
+static inline void vmathV4SetW_V( VmathVector4 *result, float _w )
+{
+    vmathV4SetW(result, _w);
+}
+
+static inline float vmathV4GetW_V( VmathVector4 vec )
+{
+    return vmathV4GetW(&vec);
+}
+
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value )
+{
+    vmathV4SetElem(result, idx, value);
+}
+
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx )
+{
+    return vmathV4GetElem(&vec, idx);
+}
+
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar )
+{
+    VmathVector4 result;
+    vmathV4ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MaxElem_V( VmathVector4 vec )
+{
+    return vmathV4MaxElem(&vec);
+}
+
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    VmathVector4 result;
+    vmathV4MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline float vmathV4MinElem_V( VmathVector4 vec )
+{
+    return vmathV4MinElem(&vec);
+}
+
+static inline float vmathV4Sum_V( VmathVector4 vec )
+{
+    return vmathV4Sum(&vec);
+}
+
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 )
+{
+    return vmathV4Dot(&vec0, &vec1);
+}
+
+static inline float vmathV4LengthSqr_V( VmathVector4 vec )
+{
+    return vmathV4LengthSqr(&vec);
+}
+
+static inline float vmathV4Length_V( VmathVector4 vec )
+{
+    return vmathV4Length(&vec);
+}
+
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec )
+{
+    VmathVector4 result;
+    vmathV4Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 )
+{
+    VmathVector4 result;
+    vmathV4Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathV4Print_V( VmathVector4 vec )
+{
+    vmathV4Print(&vec);
+}
+
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name )
+{
+    vmathV4Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathPoint3 vmathP3MakeFromElems_V( float _x, float _y, float _z )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar )
+{
+    VmathPoint3 result;
+    vmathP3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 )
+{
+    VmathPoint3 result;
+    vmathP3MakeFrom128(&result, vf4);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3Lerp(&result, t, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt )
+{
+    return vmathP3Get128(&pnt);
+}
+
+static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad )
+{
+    vmathP3StoreXYZ(&pnt, quad);
+}
+
+static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads )
+{
+    vmathP3LoadXYZArray(pnt0, pnt1, pnt2, pnt3, threeQuads);
+}
+
+static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads )
+{
+    vmathP3StoreXYZArray(&pnt0, &pnt1, &pnt2, &pnt3, threeQuads);
+}
+
+static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads )
+{
+    vmathP3StoreHalfFloats(&pnt0, &pnt1, &pnt2, &pnt3, &pnt4, &pnt5, &pnt6, &pnt7, threeQuads);
+}
+
+static inline void vmathP3SetX_V( VmathPoint3 *result, float _x )
+{
+    vmathP3SetX(result, _x);
+}
+
+static inline float vmathP3GetX_V( VmathPoint3 pnt )
+{
+    return vmathP3GetX(&pnt);
+}
+
+static inline void vmathP3SetY_V( VmathPoint3 *result, float _y )
+{
+    vmathP3SetY(result, _y);
+}
+
+static inline float vmathP3GetY_V( VmathPoint3 pnt )
+{
+    return vmathP3GetY(&pnt);
+}
+
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float _z )
+{
+    vmathP3SetZ(result, _z);
+}
+
+static inline float vmathP3GetZ_V( VmathPoint3 pnt )
+{
+    return vmathP3GetZ(&pnt);
+}
+
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value )
+{
+    vmathP3SetElem(result, idx, value);
+}
+
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx )
+{
+    return vmathP3GetElem(&pnt, idx);
+}
+
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathVector3 result;
+    vmathP3Sub(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3AddV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec1 )
+{
+    VmathPoint3 result;
+    vmathP3SubV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MulPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3DivPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RecipPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3SqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3RsqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt )
+{
+    VmathPoint3 result;
+    vmathP3AbsPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3CopySignPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MaxPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MaxElem(&pnt);
+}
+
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    VmathPoint3 result;
+    vmathP3MinPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline float vmathP3MinElem_V( VmathPoint3 pnt )
+{
+    return vmathP3MinElem(&pnt);
+}
+
+static inline float vmathP3Sum_V( VmathPoint3 pnt )
+{
+    return vmathP3Sum(&pnt);
+}
+
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal )
+{
+    VmathPoint3 result;
+    vmathP3Scale(&result, &pnt, scaleVal);
+    return result;
+}
+
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec )
+{
+    VmathPoint3 result;
+    vmathP3NonUniformScale(&result, &pnt, &scaleVec);
+    return result;
+}
+
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec )
+{
+    return vmathP3Projection(&pnt, &unitVec);
+}
+
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistSqrFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt )
+{
+    return vmathP3DistFromOrigin(&pnt);
+}
+
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3DistSqr(&pnt0, &pnt1);
+}
+
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 )
+{
+    return vmathP3Dist(&pnt0, &pnt1);
+}
+
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 )
+{
+    VmathPoint3 result;
+    vmathP3Select(&result, &pnt0, &pnt1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathP3Print_V( VmathPoint3 pnt )
+{
+    vmathP3Print(&pnt);
+}
+
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name )
+{
+    vmathP3Prints(&pnt, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa.h
index 608b38562..fde4b0015 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa.h
@@ -1,1237 +1,1237 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_C_H
-#define _VECTORMATH_VEC_SOA_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_float4 vec128 = vec->vec128;
-    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
-}
-
-static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
-    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-}
-
-static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
-{
-    vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathSoaV3Add( result, vec0, &tmpV3_1 );
-}
-
-static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
-    vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
-}
-
-static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
-    vmathV3MakeFrom128( result0, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_XAYB ) );
-    vmathV3MakeFrom128( result1, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_ZBW0 ) );
-    vmathV3MakeFrom128( result2, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_XCY0 ) );
-    vmathV3MakeFrom128( result3, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_ZDW0 ) );
-}
-
-static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
-    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
-    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
-    vmathSoaV3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
-    vmathSoaV3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
-    vmathSoaV3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
-}
-
-static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
-    zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
-    yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
-    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
-    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
-    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathSoaV3StoreXYZArray( vec0, xyz0 );
-    vmathSoaV3StoreXYZArray( vec1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = spu_add( vec0->x, vec1->x );
-    result->y = spu_add( vec0->y, vec1->y );
-    result->z = spu_add( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = spu_sub( vec0->x, vec1->x );
-    result->y = spu_sub( vec0->y, vec1->y );
-    result->z = spu_sub( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = spu_add( vec->x, pnt1->x );
-    result->y = spu_add( vec->y, pnt1->y );
-    result->z = spu_add( vec->z, pnt1->z );
-}
-
-static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
-{
-    result->x = spu_mul( vec->x, scalar );
-    result->y = spu_mul( vec->y, scalar );
-    result->z = spu_mul( vec->z, scalar );
-}
-
-static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
-{
-    result->x = divf4( vec->x, scalar );
-    result->y = divf4( vec->y, scalar );
-    result->z = divf4( vec->z, scalar );
-}
-
-static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = negatef4( vec->x );
-    result->y = negatef4( vec->y );
-    result->z = negatef4( vec->z );
-}
-
-static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = spu_mul( vec0->x, vec1->x );
-    result->y = spu_mul( vec0->y, vec1->y );
-    result->z = spu_mul( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = divf4( vec0->x, vec1->x );
-    result->y = divf4( vec0->y, vec1->y );
-    result->z = divf4( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = recipf4( vec->x );
-    result->y = recipf4( vec->y );
-    result->z = recipf4( vec->z );
-}
-
-static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = sqrtf4( vec->x );
-    result->y = sqrtf4( vec->y );
-    result->z = sqrtf4( vec->z );
-}
-
-static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = rsqrtf4( vec->x );
-    result->y = rsqrtf4( vec->y );
-    result->z = rsqrtf4( vec->z );
-}
-
-static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = fabsf4( vec->x );
-    result->y = fabsf4( vec->y );
-    result->z = fabsf4( vec->z );
-}
-
-static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = copysignf4( vec0->x, vec1->x );
-    result->y = copysignf4( vec0->y, vec1->y );
-    result->z = copysignf4( vec0->z, vec1->z );
-}
-
-static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = fmaxf4( vec0->x, vec1->x );
-    result->y = fmaxf4( vec0->y, vec1->y );
-    result->z = fmaxf4( vec0->z, vec1->z );
-}
-
-static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec->x, vec->y );
-    result = fmaxf4( vec->z, result );
-    return result;
-}
-
-static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    result->x = fminf4( vec0->x, vec1->x );
-    result->y = fminf4( vec0->y, vec1->y );
-    result->z = fminf4( vec0->z, vec1->z );
-}
-
-static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec->x, vec->y );
-    result = fminf4( vec->z, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = spu_add( vec->x, vec->y );
-    result = spu_add( result, vec->z );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0->x, vec1->x );
-    result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
-    result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
-{
-    vec_float4 result;
-    result = spu_mul( vec->x, vec->x );
-    result = spu_add( result, spu_mul( vec->y, vec->y ) );
-    result = spu_add( result, spu_mul( vec->z, vec->z ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
-{
-    return sqrtf4( vmathSoaV3LengthSqr( vec ) );
-}
-
-static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaV3LengthSqr( vec );
-    lenInv = rsqrtf4( lenSqr );
-    result->x = spu_mul( vec->x, lenInv );
-    result->y = spu_mul( vec->y, lenInv );
-    result->z = spu_mul( vec->z, lenInv );
-}
-
-static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
-{
-    vec_float4 tmpX, tmpY, tmpZ;
-    tmpX = spu_sub( spu_mul( vec0->y, vec1->z ), spu_mul( vec0->z, vec1->y ) );
-    tmpY = spu_sub( spu_mul( vec0->z, vec1->x ), spu_mul( vec0->x, vec1->z ) );
-    tmpZ = spu_sub( spu_mul( vec0->x, vec1->y ), spu_mul( vec0->y, vec1->x ) );
-    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
-}
-
-static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
-{
-    result->x = spu_sel( vec0->x, vec1->x, select1 );
-    result->y = spu_sel( vec0->y, vec1->y, select1 );
-    result->z = spu_sel( vec0->z, vec1->z, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
-{
-    VmathVector3 vec0, vec1, vec2, vec3;
-    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV3Print( &vec3 );
-}
-
-static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
-{
-    VmathVector3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV3Print( &vec3 );
-}
-
-#endif
-
-static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = vec->w;
-}
-
-static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-    result->w = _w;
-}
-
-static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
-{
-    vmathSoaV4SetXYZ( result, xyz );
-    vmathSoaV4SetW( result, _w );
-}
-
-static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-    result->w = spu_splats(0.0f);
-}
-
-static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-    result->w = spu_splats(1.0f);
-}
-
-static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
-{
-    result->x = quat->x;
-    result->y = quat->y;
-    result->z = quat->z;
-    result->w = quat->w;
-}
-
-static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-    result->w = scalar;
-}
-
-static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_float4 vec128 = vec->vec128;
-    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
-    result->w = spu_shuffle( vec128, vec128, shuffle_wwww );
-}
-
-static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
-    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    result->w = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-}
-
-static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
-}
-
-static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
-{
-    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    VmathSoaVector4 tmpV4_0, tmpV4_1;
-    vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
-    vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
-    vmathSoaV4Add( result, vec0, &tmpV4_1 );
-}
-
-static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
-{
-    VmathSoaVector4 tmpV4_0, tmpV4_1;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
-    vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
-    vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
-}
-
-static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_ZCWD );
-    vmathV4MakeFrom128( result0, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
-    vmathV4MakeFrom128( result1, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
-    vmathV4MakeFrom128( result2, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
-    vmathV4MakeFrom128( result3, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
-}
-
-static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
-{
-    VmathVector4 v0, v1, v2, v3;
-    vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
-    twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
-    twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
-}
-
-static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
-{
-    vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
-}
-
-static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
-{
-    return vec->x;
-}
-
-static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
-{
-    return vec->y;
-}
-
-static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
-{
-    return vec->z;
-}
-
-static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
-{
-    result->w = _w;
-}
-
-static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
-{
-    return vec->w;
-}
-
-static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
-{
-    return *(&vec->x + idx);
-}
-
-static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = spu_add( vec0->x, vec1->x );
-    result->y = spu_add( vec0->y, vec1->y );
-    result->z = spu_add( vec0->z, vec1->z );
-    result->w = spu_add( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = spu_sub( vec0->x, vec1->x );
-    result->y = spu_sub( vec0->y, vec1->y );
-    result->z = spu_sub( vec0->z, vec1->z );
-    result->w = spu_sub( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
-{
-    result->x = spu_mul( vec->x, scalar );
-    result->y = spu_mul( vec->y, scalar );
-    result->z = spu_mul( vec->z, scalar );
-    result->w = spu_mul( vec->w, scalar );
-}
-
-static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
-{
-    result->x = divf4( vec->x, scalar );
-    result->y = divf4( vec->y, scalar );
-    result->z = divf4( vec->z, scalar );
-    result->w = divf4( vec->w, scalar );
-}
-
-static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = negatef4( vec->x );
-    result->y = negatef4( vec->y );
-    result->z = negatef4( vec->z );
-    result->w = negatef4( vec->w );
-}
-
-static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = spu_mul( vec0->x, vec1->x );
-    result->y = spu_mul( vec0->y, vec1->y );
-    result->z = spu_mul( vec0->z, vec1->z );
-    result->w = spu_mul( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = divf4( vec0->x, vec1->x );
-    result->y = divf4( vec0->y, vec1->y );
-    result->z = divf4( vec0->z, vec1->z );
-    result->w = divf4( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = recipf4( vec->x );
-    result->y = recipf4( vec->y );
-    result->z = recipf4( vec->z );
-    result->w = recipf4( vec->w );
-}
-
-static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = sqrtf4( vec->x );
-    result->y = sqrtf4( vec->y );
-    result->z = sqrtf4( vec->z );
-    result->w = sqrtf4( vec->w );
-}
-
-static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = rsqrtf4( vec->x );
-    result->y = rsqrtf4( vec->y );
-    result->z = rsqrtf4( vec->z );
-    result->w = rsqrtf4( vec->w );
-}
-
-static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    result->x = fabsf4( vec->x );
-    result->y = fabsf4( vec->y );
-    result->z = fabsf4( vec->z );
-    result->w = fabsf4( vec->w );
-}
-
-static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = copysignf4( vec0->x, vec1->x );
-    result->y = copysignf4( vec0->y, vec1->y );
-    result->z = copysignf4( vec0->z, vec1->z );
-    result->w = copysignf4( vec0->w, vec1->w );
-}
-
-static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = fmaxf4( vec0->x, vec1->x );
-    result->y = fmaxf4( vec0->y, vec1->y );
-    result->z = fmaxf4( vec0->z, vec1->z );
-    result->w = fmaxf4( vec0->w, vec1->w );
-}
-
-static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec->x, vec->y );
-    result = fmaxf4( vec->z, result );
-    result = fmaxf4( vec->w, result );
-    return result;
-}
-
-static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    result->x = fminf4( vec0->x, vec1->x );
-    result->y = fminf4( vec0->y, vec1->y );
-    result->z = fminf4( vec0->z, vec1->z );
-    result->w = fminf4( vec0->w, vec1->w );
-}
-
-static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = fminf4( vec->x, vec->y );
-    result = fminf4( vec->z, result );
-    result = fminf4( vec->w, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = spu_add( vec->x, vec->y );
-    result = spu_add( result, vec->z );
-    result = spu_add( result, vec->w );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0->x, vec1->x );
-    result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
-    result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
-    result = spu_add( result, spu_mul( vec0->w, vec1->w ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
-{
-    vec_float4 result;
-    result = spu_mul( vec->x, vec->x );
-    result = spu_add( result, spu_mul( vec->y, vec->y ) );
-    result = spu_add( result, spu_mul( vec->z, vec->z ) );
-    result = spu_add( result, spu_mul( vec->w, vec->w ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
-{
-    return sqrtf4( vmathSoaV4LengthSqr( vec ) );
-}
-
-static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = vmathSoaV4LengthSqr( vec );
-    lenInv = rsqrtf4( lenSqr );
-    result->x = spu_mul( vec->x, lenInv );
-    result->y = spu_mul( vec->y, lenInv );
-    result->z = spu_mul( vec->z, lenInv );
-    result->w = spu_mul( vec->w, lenInv );
-}
-
-static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
-{
-    result->x = spu_sel( vec0->x, vec1->x, select1 );
-    result->y = spu_sel( vec0->y, vec1->y, select1 );
-    result->z = spu_sel( vec0->z, vec1->z, select1 );
-    result->w = spu_sel( vec0->w, vec1->w, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
-{
-    VmathVector4 vec0, vec1, vec2, vec3;
-    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV4Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV4Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV4Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV4Print( &vec3 );
-}
-
-static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
-{
-    VmathVector4 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathV4Print( &vec0 );
-    printf("slot 1:\n");
-    vmathV4Print( &vec1 );
-    printf("slot 2:\n");
-    vmathV4Print( &vec2 );
-    printf("slot 3:\n");
-    vmathV4Print( &vec3 );
-}
-
-#endif
-
-static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = pnt->x;
-    result->y = pnt->y;
-    result->z = pnt->z;
-}
-
-static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    result->x = _x;
-    result->y = _y;
-    result->z = _z;
-}
-
-static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
-{
-    result->x = vec->x;
-    result->y = vec->y;
-    result->z = vec->z;
-}
-
-static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
-{
-    result->x = scalar;
-    result->y = scalar;
-    result->z = scalar;
-}
-
-static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_float4 vec128 = pnt->vec128;
-    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
-}
-
-static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZCWD );
-    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-}
-
-static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0, tmpV3_1;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
-    vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
-}
-
-static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_ZCWD );
-    vmathP3MakeFrom128( result0, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_XAYB ) );
-    vmathP3MakeFrom128( result1, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_ZBW0 ) );
-    vmathP3MakeFrom128( result2, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_XCY0 ) );
-    vmathP3MakeFrom128( result3, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_ZDW0 ) );
-}
-
-static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
-    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
-    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
-    vmathSoaP3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
-    vmathSoaP3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
-    vmathSoaP3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
-}
-
-static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
-    zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
-    yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
-    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
-    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
-    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    vmathSoaP3StoreXYZArray( pnt0, xyz0 );
-    vmathSoaP3StoreXYZArray( pnt1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
-{
-    result->x = _x;
-}
-
-static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
-{
-    return pnt->x;
-}
-
-static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
-{
-    result->y = _y;
-}
-
-static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
-{
-    return pnt->y;
-}
-
-static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
-{
-    result->z = _z;
-}
-
-static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
-{
-    return pnt->z;
-}
-
-static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
-{
-    *(&result->x + idx) = value;
-}
-
-static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
-{
-    return *(&pnt->x + idx);
-}
-
-static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = spu_sub( pnt0->x, pnt1->x );
-    result->y = spu_sub( pnt0->y, pnt1->y );
-    result->z = spu_sub( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
-{
-    result->x = spu_add( pnt->x, vec1->x );
-    result->y = spu_add( pnt->y, vec1->y );
-    result->z = spu_add( pnt->z, vec1->z );
-}
-
-static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
-{
-    result->x = spu_sub( pnt->x, vec1->x );
-    result->y = spu_sub( pnt->y, vec1->y );
-    result->z = spu_sub( pnt->z, vec1->z );
-}
-
-static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = spu_mul( pnt0->x, pnt1->x );
-    result->y = spu_mul( pnt0->y, pnt1->y );
-    result->z = spu_mul( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = divf4( pnt0->x, pnt1->x );
-    result->y = divf4( pnt0->y, pnt1->y );
-    result->z = divf4( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = recipf4( pnt->x );
-    result->y = recipf4( pnt->y );
-    result->z = recipf4( pnt->z );
-}
-
-static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = sqrtf4( pnt->x );
-    result->y = sqrtf4( pnt->y );
-    result->z = sqrtf4( pnt->z );
-}
-
-static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = rsqrtf4( pnt->x );
-    result->y = rsqrtf4( pnt->y );
-    result->z = rsqrtf4( pnt->z );
-}
-
-static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
-{
-    result->x = fabsf4( pnt->x );
-    result->y = fabsf4( pnt->y );
-    result->z = fabsf4( pnt->z );
-}
-
-static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = copysignf4( pnt0->x, pnt1->x );
-    result->y = copysignf4( pnt0->y, pnt1->y );
-    result->z = copysignf4( pnt0->z, pnt1->z );
-}
-
-static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = fmaxf4( pnt0->x, pnt1->x );
-    result->y = fmaxf4( pnt0->y, pnt1->y );
-    result->z = fmaxf4( pnt0->z, pnt1->z );
-}
-
-static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( pnt->x, pnt->y );
-    result = fmaxf4( pnt->z, result );
-    return result;
-}
-
-static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    result->x = fminf4( pnt0->x, pnt1->x );
-    result->y = fminf4( pnt0->y, pnt1->y );
-    result->z = fminf4( pnt0->z, pnt1->z );
-}
-
-static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = fminf4( pnt->x, pnt->y );
-    result = fminf4( pnt->z, result );
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
-{
-    vec_float4 result;
-    result = spu_add( pnt->x, pnt->y );
-    result = spu_add( result, pnt->z );
-    return result;
-}
-
-static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
-{
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
-    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
-{
-    VmathSoaPoint3 tmpP3_0;
-    vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
-    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
-}
-
-static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
-{
-    vec_float4 result;
-    result = spu_mul( pnt->x, unitVec->x );
-    result = spu_add( result, spu_mul( pnt->y, unitVec->y ) );
-    result = spu_add( result, spu_mul( pnt->z, unitVec->z ) );
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathSoaV3LengthSqr( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
-    return vmathSoaV3Length( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathSoaV3LengthSqr( &tmpV3_0 );
-}
-
-static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
-{
-    VmathSoaVector3 tmpV3_0;
-    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
-    return vmathSoaV3Length( &tmpV3_0 );
-}
-
-static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
-{
-    result->x = spu_sel( pnt0->x, pnt1->x, select1 );
-    result->y = spu_sel( pnt0->y, pnt1->y, select1 );
-    result->z = spu_sel( pnt0->z, pnt1->z, select1 );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
-{
-    VmathPoint3 vec0, vec1, vec2, vec3;
-    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathP3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathP3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathP3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathP3Print( &vec3 );
-}
-
-static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
-{
-    VmathPoint3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
-    printf("slot 0:\n");
-    vmathP3Print( &vec0 );
-    printf("slot 1:\n");
-    vmathP3Print( &vec1 );
-    printf("slot 2:\n");
-    vmathP3Print( &vec2 );
-    printf("slot 3:\n");
-    vmathP3Print( &vec3 );
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_C_H
+#define _VECTORMATH_VEC_SOA_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_float4 vec128 = vec->vec128;
+    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
+}
+
+static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
+    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+}
+
+static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result )
+{
+    vmathSoaV3MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vmathSoaV3Sub( &tmpV3_0, vec1, vec0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathSoaV3Add( result, vec0, &tmpV3_1 );
+}
+
+static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaV3Dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    vmathSoaV3ScalarMul( &tmpV3_0, unitVec0, scale0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, unitVec1, scale1 );
+    vmathSoaV3Add( result, &tmpV3_0, &tmpV3_1 );
+}
+
+static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
+    vmathV3MakeFrom128( result0, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_XAYB ) );
+    vmathV3MakeFrom128( result1, spu_shuffle( tmp0, vec->y, _VECTORMATH_SHUF_ZBW0 ) );
+    vmathV3MakeFrom128( result2, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_XCY0 ) );
+    vmathV3MakeFrom128( result3, spu_shuffle( tmp1, vec->y, _VECTORMATH_SHUF_ZDW0 ) );
+}
+
+static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
+    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
+    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
+    vmathSoaV3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
+    vmathSoaV3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
+    vmathSoaV3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
+}
+
+static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
+    zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
+    yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
+    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
+    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
+    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathSoaV3StoreXYZArray( vec0, xyz0 );
+    vmathSoaV3StoreXYZArray( vec1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = spu_add( vec0->x, vec1->x );
+    result->y = spu_add( vec0->y, vec1->y );
+    result->z = spu_add( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = spu_sub( vec0->x, vec1->x );
+    result->y = spu_sub( vec0->y, vec1->y );
+    result->z = spu_sub( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = spu_add( vec->x, pnt1->x );
+    result->y = spu_add( vec->y, pnt1->y );
+    result->z = spu_add( vec->z, pnt1->z );
+}
+
+static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
+{
+    result->x = spu_mul( vec->x, scalar );
+    result->y = spu_mul( vec->y, scalar );
+    result->z = spu_mul( vec->z, scalar );
+}
+
+static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar )
+{
+    result->x = divf4( vec->x, scalar );
+    result->y = divf4( vec->y, scalar );
+    result->z = divf4( vec->z, scalar );
+}
+
+static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = negatef4( vec->x );
+    result->y = negatef4( vec->y );
+    result->z = negatef4( vec->z );
+}
+
+static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = spu_mul( vec0->x, vec1->x );
+    result->y = spu_mul( vec0->y, vec1->y );
+    result->z = spu_mul( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = divf4( vec0->x, vec1->x );
+    result->y = divf4( vec0->y, vec1->y );
+    result->z = divf4( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = recipf4( vec->x );
+    result->y = recipf4( vec->y );
+    result->z = recipf4( vec->z );
+}
+
+static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = sqrtf4( vec->x );
+    result->y = sqrtf4( vec->y );
+    result->z = sqrtf4( vec->z );
+}
+
+static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = rsqrtf4( vec->x );
+    result->y = rsqrtf4( vec->y );
+    result->z = rsqrtf4( vec->z );
+}
+
+static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = fabsf4( vec->x );
+    result->y = fabsf4( vec->y );
+    result->z = fabsf4( vec->z );
+}
+
+static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = copysignf4( vec0->x, vec1->x );
+    result->y = copysignf4( vec0->y, vec1->y );
+    result->z = copysignf4( vec0->z, vec1->z );
+}
+
+static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = fmaxf4( vec0->x, vec1->x );
+    result->y = fmaxf4( vec0->y, vec1->y );
+    result->z = fmaxf4( vec0->z, vec1->z );
+}
+
+static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec->x, vec->y );
+    result = fmaxf4( vec->z, result );
+    return result;
+}
+
+static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    result->x = fminf4( vec0->x, vec1->x );
+    result->y = fminf4( vec0->y, vec1->y );
+    result->z = fminf4( vec0->z, vec1->z );
+}
+
+static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec->x, vec->y );
+    result = fminf4( vec->z, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = spu_add( vec->x, vec->y );
+    result = spu_add( result, vec->z );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0->x, vec1->x );
+    result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
+    result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec )
+{
+    vec_float4 result;
+    result = spu_mul( vec->x, vec->x );
+    result = spu_add( result, spu_mul( vec->y, vec->y ) );
+    result = spu_add( result, spu_mul( vec->z, vec->z ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec )
+{
+    return sqrtf4( vmathSoaV3LengthSqr( vec ) );
+}
+
+static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaV3LengthSqr( vec );
+    lenInv = rsqrtf4( lenSqr );
+    result->x = spu_mul( vec->x, lenInv );
+    result->y = spu_mul( vec->y, lenInv );
+    result->z = spu_mul( vec->z, lenInv );
+}
+
+static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 )
+{
+    vec_float4 tmpX, tmpY, tmpZ;
+    tmpX = spu_sub( spu_mul( vec0->y, vec1->z ), spu_mul( vec0->z, vec1->y ) );
+    tmpY = spu_sub( spu_mul( vec0->z, vec1->x ), spu_mul( vec0->x, vec1->z ) );
+    tmpZ = spu_sub( spu_mul( vec0->x, vec1->y ), spu_mul( vec0->y, vec1->x ) );
+    vmathSoaV3MakeFromElems( result, tmpX, tmpY, tmpZ );
+}
+
+static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 )
+{
+    result->x = spu_sel( vec0->x, vec1->x, select1 );
+    result->y = spu_sel( vec0->y, vec1->y, select1 );
+    result->z = spu_sel( vec0->z, vec1->z, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV3Print( const VmathSoaVector3 *vec )
+{
+    VmathVector3 vec0, vec1, vec2, vec3;
+    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV3Print( &vec3 );
+}
+
+static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name )
+{
+    VmathVector3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaV3Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV3Print( &vec3 );
+}
+
+#endif
+
+static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = vec->w;
+}
+
+static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+    result->w = _w;
+}
+
+static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 _w )
+{
+    vmathSoaV4SetXYZ( result, xyz );
+    vmathSoaV4SetW( result, _w );
+}
+
+static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+    result->w = spu_splats(0.0f);
+}
+
+static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+    result->w = spu_splats(1.0f);
+}
+
+static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat )
+{
+    result->x = quat->x;
+    result->y = quat->y;
+    result->z = quat->z;
+    result->w = quat->w;
+}
+
+static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+    result->w = scalar;
+}
+
+static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_float4 vec128 = vec->vec128;
+    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
+    result->w = spu_shuffle( vec128, vec128, shuffle_wwww );
+}
+
+static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( vec0->vec128, vec2->vec128, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( vec1->vec128, vec3->vec128, _VECTORMATH_SHUF_ZCWD );
+    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    result->w = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+}
+
+static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
+}
+
+static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result )
+{
+    vmathSoaV4MakeFromElems( result, spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    VmathSoaVector4 tmpV4_0, tmpV4_1;
+    vmathSoaV4Sub( &tmpV4_0, vec1, vec0 );
+    vmathSoaV4ScalarMul( &tmpV4_1, &tmpV4_0, t );
+    vmathSoaV4Add( result, vec0, &tmpV4_1 );
+}
+
+static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 )
+{
+    VmathSoaVector4 tmpV4_0, tmpV4_1;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = vmathSoaV4Dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    vmathSoaV4ScalarMul( &tmpV4_0, unitVec0, scale0 );
+    vmathSoaV4ScalarMul( &tmpV4_1, unitVec1, scale1 );
+    vmathSoaV4Add( result, &tmpV4_0, &tmpV4_1 );
+}
+
+static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( vec->x, vec->z, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( vec->y, vec->w, _VECTORMATH_SHUF_ZCWD );
+    vmathV4MakeFrom128( result0, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
+    vmathV4MakeFrom128( result1, spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
+    vmathV4MakeFrom128( result2, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
+    vmathV4MakeFrom128( result3, spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
+}
+
+static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads )
+{
+    VmathVector4 v0, v1, v2, v3;
+    vmathSoaV4Get4Aos( vec, &v0, &v1, &v2, &v3 );
+    twoQuads[0] = _vmath2VfToHalfFloats(v0.vec128, v1.vec128);
+    twoQuads[1] = _vmath2VfToHalfFloats(v2.vec128, v3.vec128);
+}
+
+static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec )
+{
+    vmathSoaV3MakeFromElems( result, vec->x, vec->y, vec->z );
+}
+
+static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec )
+{
+    return vec->x;
+}
+
+static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec )
+{
+    return vec->y;
+}
+
+static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec )
+{
+    return vec->z;
+}
+
+static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 _w )
+{
+    result->w = _w;
+}
+
+static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec )
+{
+    return vec->w;
+}
+
+static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx )
+{
+    return *(&vec->x + idx);
+}
+
+static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = spu_add( vec0->x, vec1->x );
+    result->y = spu_add( vec0->y, vec1->y );
+    result->z = spu_add( vec0->z, vec1->z );
+    result->w = spu_add( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = spu_sub( vec0->x, vec1->x );
+    result->y = spu_sub( vec0->y, vec1->y );
+    result->z = spu_sub( vec0->z, vec1->z );
+    result->w = spu_sub( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
+{
+    result->x = spu_mul( vec->x, scalar );
+    result->y = spu_mul( vec->y, scalar );
+    result->z = spu_mul( vec->z, scalar );
+    result->w = spu_mul( vec->w, scalar );
+}
+
+static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar )
+{
+    result->x = divf4( vec->x, scalar );
+    result->y = divf4( vec->y, scalar );
+    result->z = divf4( vec->z, scalar );
+    result->w = divf4( vec->w, scalar );
+}
+
+static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = negatef4( vec->x );
+    result->y = negatef4( vec->y );
+    result->z = negatef4( vec->z );
+    result->w = negatef4( vec->w );
+}
+
+static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = spu_mul( vec0->x, vec1->x );
+    result->y = spu_mul( vec0->y, vec1->y );
+    result->z = spu_mul( vec0->z, vec1->z );
+    result->w = spu_mul( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = divf4( vec0->x, vec1->x );
+    result->y = divf4( vec0->y, vec1->y );
+    result->z = divf4( vec0->z, vec1->z );
+    result->w = divf4( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = recipf4( vec->x );
+    result->y = recipf4( vec->y );
+    result->z = recipf4( vec->z );
+    result->w = recipf4( vec->w );
+}
+
+static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = sqrtf4( vec->x );
+    result->y = sqrtf4( vec->y );
+    result->z = sqrtf4( vec->z );
+    result->w = sqrtf4( vec->w );
+}
+
+static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = rsqrtf4( vec->x );
+    result->y = rsqrtf4( vec->y );
+    result->z = rsqrtf4( vec->z );
+    result->w = rsqrtf4( vec->w );
+}
+
+static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    result->x = fabsf4( vec->x );
+    result->y = fabsf4( vec->y );
+    result->z = fabsf4( vec->z );
+    result->w = fabsf4( vec->w );
+}
+
+static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = copysignf4( vec0->x, vec1->x );
+    result->y = copysignf4( vec0->y, vec1->y );
+    result->z = copysignf4( vec0->z, vec1->z );
+    result->w = copysignf4( vec0->w, vec1->w );
+}
+
+static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = fmaxf4( vec0->x, vec1->x );
+    result->y = fmaxf4( vec0->y, vec1->y );
+    result->z = fmaxf4( vec0->z, vec1->z );
+    result->w = fmaxf4( vec0->w, vec1->w );
+}
+
+static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec->x, vec->y );
+    result = fmaxf4( vec->z, result );
+    result = fmaxf4( vec->w, result );
+    return result;
+}
+
+static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    result->x = fminf4( vec0->x, vec1->x );
+    result->y = fminf4( vec0->y, vec1->y );
+    result->z = fminf4( vec0->z, vec1->z );
+    result->w = fminf4( vec0->w, vec1->w );
+}
+
+static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = fminf4( vec->x, vec->y );
+    result = fminf4( vec->z, result );
+    result = fminf4( vec->w, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = spu_add( vec->x, vec->y );
+    result = spu_add( result, vec->z );
+    result = spu_add( result, vec->w );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0->x, vec1->x );
+    result = spu_add( result, spu_mul( vec0->y, vec1->y ) );
+    result = spu_add( result, spu_mul( vec0->z, vec1->z ) );
+    result = spu_add( result, spu_mul( vec0->w, vec1->w ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec )
+{
+    vec_float4 result;
+    result = spu_mul( vec->x, vec->x );
+    result = spu_add( result, spu_mul( vec->y, vec->y ) );
+    result = spu_add( result, spu_mul( vec->z, vec->z ) );
+    result = spu_add( result, spu_mul( vec->w, vec->w ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec )
+{
+    return sqrtf4( vmathSoaV4LengthSqr( vec ) );
+}
+
+static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = vmathSoaV4LengthSqr( vec );
+    lenInv = rsqrtf4( lenSqr );
+    result->x = spu_mul( vec->x, lenInv );
+    result->y = spu_mul( vec->y, lenInv );
+    result->z = spu_mul( vec->z, lenInv );
+    result->w = spu_mul( vec->w, lenInv );
+}
+
+static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 )
+{
+    result->x = spu_sel( vec0->x, vec1->x, select1 );
+    result->y = spu_sel( vec0->y, vec1->y, select1 );
+    result->z = spu_sel( vec0->z, vec1->z, select1 );
+    result->w = spu_sel( vec0->w, vec1->w, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV4Print( const VmathSoaVector4 *vec )
+{
+    VmathVector4 vec0, vec1, vec2, vec3;
+    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV4Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV4Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV4Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV4Print( &vec3 );
+}
+
+static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name )
+{
+    VmathVector4 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaV4Get4Aos( vec, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathV4Print( &vec0 );
+    printf("slot 1:\n");
+    vmathV4Print( &vec1 );
+    printf("slot 2:\n");
+    vmathV4Print( &vec2 );
+    printf("slot 3:\n");
+    vmathV4Print( &vec3 );
+}
+
+#endif
+
+static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = pnt->x;
+    result->y = pnt->y;
+    result->z = pnt->z;
+}
+
+static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    result->x = _x;
+    result->y = _y;
+    result->z = _z;
+}
+
+static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec )
+{
+    result->x = vec->x;
+    result->y = vec->y;
+    result->z = vec->z;
+}
+
+static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar )
+{
+    result->x = scalar;
+    result->y = scalar;
+    result->z = scalar;
+}
+
+static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_float4 vec128 = pnt->vec128;
+    result->x = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    result->y = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    result->z = spu_shuffle( vec128, vec128, shuffle_zzzz );
+}
+
+static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( pnt0->vec128, pnt2->vec128, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( pnt1->vec128, pnt3->vec128, _VECTORMATH_SHUF_ZCWD );
+    result->x = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    result->y = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    result->z = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+}
+
+static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    vmathSoaV3ScalarMul( &tmpV3_1, &tmpV3_0, t );
+    vmathSoaP3AddV3( result, pnt0, &tmpV3_1 );
+}
+
+static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( pnt->x, pnt->z, _VECTORMATH_SHUF_ZCWD );
+    vmathP3MakeFrom128( result0, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_XAYB ) );
+    vmathP3MakeFrom128( result1, spu_shuffle( tmp0, pnt->y, _VECTORMATH_SHUF_ZBW0 ) );
+    vmathP3MakeFrom128( result2, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_XCY0 ) );
+    vmathP3MakeFrom128( result3, spu_shuffle( tmp1, pnt->y, _VECTORMATH_SHUF_ZDW0 ) );
+}
+
+static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
+    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
+    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
+    vmathSoaP3SetX( vec, spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
+    vmathSoaP3SetY( vec, spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
+    vmathSoaP3SetZ( vec, spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
+}
+
+static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *vec, vec_float4 *threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = spu_shuffle( vec->x, vec->y, _VECTORMATH_SHUF_XAZC );
+    zxzx = spu_shuffle( vec->z, vec->x, _VECTORMATH_SHUF_ZDXB );
+    yzyz = spu_shuffle( vec->y, vec->z, _VECTORMATH_SHUF_YBWD );
+    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
+    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
+    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    vmathSoaP3StoreXYZArray( pnt0, xyz0 );
+    vmathSoaP3StoreXYZArray( pnt1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 _x )
+{
+    result->x = _x;
+}
+
+static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt )
+{
+    return pnt->x;
+}
+
+static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 _y )
+{
+    result->y = _y;
+}
+
+static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt )
+{
+    return pnt->y;
+}
+
+static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 _z )
+{
+    result->z = _z;
+}
+
+static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt )
+{
+    return pnt->z;
+}
+
+static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value )
+{
+    *(&result->x + idx) = value;
+}
+
+static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx )
+{
+    return *(&pnt->x + idx);
+}
+
+static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = spu_sub( pnt0->x, pnt1->x );
+    result->y = spu_sub( pnt0->y, pnt1->y );
+    result->z = spu_sub( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
+{
+    result->x = spu_add( pnt->x, vec1->x );
+    result->y = spu_add( pnt->y, vec1->y );
+    result->z = spu_add( pnt->z, vec1->z );
+}
+
+static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec1 )
+{
+    result->x = spu_sub( pnt->x, vec1->x );
+    result->y = spu_sub( pnt->y, vec1->y );
+    result->z = spu_sub( pnt->z, vec1->z );
+}
+
+static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = spu_mul( pnt0->x, pnt1->x );
+    result->y = spu_mul( pnt0->y, pnt1->y );
+    result->z = spu_mul( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = divf4( pnt0->x, pnt1->x );
+    result->y = divf4( pnt0->y, pnt1->y );
+    result->z = divf4( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = recipf4( pnt->x );
+    result->y = recipf4( pnt->y );
+    result->z = recipf4( pnt->z );
+}
+
+static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = sqrtf4( pnt->x );
+    result->y = sqrtf4( pnt->y );
+    result->z = sqrtf4( pnt->z );
+}
+
+static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = rsqrtf4( pnt->x );
+    result->y = rsqrtf4( pnt->y );
+    result->z = rsqrtf4( pnt->z );
+}
+
+static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt )
+{
+    result->x = fabsf4( pnt->x );
+    result->y = fabsf4( pnt->y );
+    result->z = fabsf4( pnt->z );
+}
+
+static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = copysignf4( pnt0->x, pnt1->x );
+    result->y = copysignf4( pnt0->y, pnt1->y );
+    result->z = copysignf4( pnt0->z, pnt1->z );
+}
+
+static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = fmaxf4( pnt0->x, pnt1->x );
+    result->y = fmaxf4( pnt0->y, pnt1->y );
+    result->z = fmaxf4( pnt0->z, pnt1->z );
+}
+
+static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( pnt->x, pnt->y );
+    result = fmaxf4( pnt->z, result );
+    return result;
+}
+
+static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    result->x = fminf4( pnt0->x, pnt1->x );
+    result->y = fminf4( pnt0->y, pnt1->y );
+    result->z = fminf4( pnt0->z, pnt1->z );
+}
+
+static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = fminf4( pnt->x, pnt->y );
+    result = fminf4( pnt->z, result );
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt )
+{
+    vec_float4 result;
+    result = spu_add( pnt->x, pnt->y );
+    result = spu_add( result, pnt->z );
+    return result;
+}
+
+static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal )
+{
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaP3MakeFromScalar( &tmpP3_0, scaleVal );
+    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec )
+{
+    VmathSoaPoint3 tmpP3_0;
+    vmathSoaP3MakeFromV3( &tmpP3_0, scaleVec );
+    vmathSoaP3MulPerElem( result, pnt, &tmpP3_0 );
+}
+
+static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec )
+{
+    vec_float4 result;
+    result = spu_mul( pnt->x, unitVec->x );
+    result = spu_add( result, spu_mul( pnt->y, unitVec->y ) );
+    result = spu_add( result, spu_mul( pnt->z, unitVec->z ) );
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathSoaV3LengthSqr( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaV3MakeFromP3( &tmpV3_0, pnt );
+    return vmathSoaV3Length( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathSoaV3LengthSqr( &tmpV3_0 );
+}
+
+static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 )
+{
+    VmathSoaVector3 tmpV3_0;
+    vmathSoaP3Sub( &tmpV3_0, pnt1, pnt0 );
+    return vmathSoaV3Length( &tmpV3_0 );
+}
+
+static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 )
+{
+    result->x = spu_sel( pnt0->x, pnt1->x, select1 );
+    result->y = spu_sel( pnt0->y, pnt1->y, select1 );
+    result->z = spu_sel( pnt0->z, pnt1->z, select1 );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt )
+{
+    VmathPoint3 vec0, vec1, vec2, vec3;
+    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathP3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathP3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathP3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathP3Print( &vec3 );
+}
+
+static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name )
+{
+    VmathPoint3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vmathSoaP3Get4Aos( pnt, &vec0, &vec1, &vec2, &vec3 );
+    printf("slot 0:\n");
+    vmathP3Print( &vec0 );
+    printf("slot 1:\n");
+    vmathP3Print( &vec1 );
+    printf("slot 2:\n");
+    vmathP3Print( &vec2 );
+    printf("slot 3:\n");
+    vmathP3Print( &vec3 );
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa_v.h
index 560356a77..788fa5c39 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vec_soa_v.h
@@ -1,962 +1,962 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_V_C_H
-#define _VECTORMATH_VEC_SOA_V_C_H
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/*-----------------------------------------------------------------------------
- * Constants
- * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
- */
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-/*-----------------------------------------------------------------------------
- * Definitions
- */
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFromAos(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
-{
-    vmathSoaV3Get4Aos(&vec, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
-{
-    vmathSoaV3LoadXYZArray(vec, threeQuads);
-}
-
-static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads )
-{
-    vmathSoaV3StoreXYZArray(&vec, threeQuads);
-}
-
-static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads )
-{
-    vmathSoaV3StoreHalfFloats(&vec0, &vec1, threeQuads);
-}
-
-static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 _x )
-{
-    vmathSoaV3SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetX(&vec);
-}
-
-static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 _y )
-{
-    vmathSoaV3SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetY(&vec);
-}
-
-static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 _z )
-{
-    vmathSoaV3SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3GetZ(&vec);
-}
-
-static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value )
-{
-    vmathSoaV3SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx )
-{
-    return vmathSoaV3GetElem(&vec, idx);
-}
-
-static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaV3AddP3(&result, &vec, &pnt1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3MaxElem(&vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3MinElem(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3Sum(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    return vmathSoaV3Dot(&vec0, &vec1);
-}
-
-static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3LengthSqr(&vec);
-}
-
-static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec )
-{
-    return vmathSoaV3Length(&vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Cross(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaV3Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV3Print_V( VmathSoaVector3 vec )
-{
-    vmathSoaV3Print(&vec);
-}
-
-static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name )
-{
-    vmathSoaV3Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromElems(&result, _x, _y, _z, _w);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromV3Scalar(&result, &xyz, _w);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromP3(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromQ(&result, &quat);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFromAos(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeXAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeYAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeZAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MakeWAxis(&result);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Lerp(&result, t, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Slerp(&result, t, &unitVec0, &unitVec1);
-    return result;
-}
-
-static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
-{
-    vmathSoaV4Get4Aos(&vec, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads )
-{
-    vmathSoaV4StoreHalfFloats(&vec, twoQuads);
-}
-
-static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec )
-{
-    vmathSoaV4SetXYZ(result, &vec);
-}
-
-static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector3 result;
-    vmathSoaV4GetXYZ(&result, &vec);
-    return result;
-}
-
-static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 _x )
-{
-    vmathSoaV4SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetX(&vec);
-}
-
-static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 _y )
-{
-    vmathSoaV4SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetY(&vec);
-}
-
-static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 _z )
-{
-    vmathSoaV4SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetZ(&vec);
-}
-
-static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 _w )
-{
-    vmathSoaV4SetW(result, _w);
-}
-
-static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4GetW(&vec);
-}
-
-static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value )
-{
-    vmathSoaV4SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx )
-{
-    return vmathSoaV4GetElem(&vec, idx);
-}
-
-static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Add(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Sub(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4ScalarMul(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4ScalarDiv(&result, &vec, scalar);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Neg(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MulPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4DivPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4RecipPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4SqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4RsqrtPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4AbsPerElem(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4CopySignPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MaxPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4MaxElem(&vec);
-}
-
-static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4MinPerElem(&result, &vec0, &vec1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4MinElem(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4Sum(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
-{
-    return vmathSoaV4Dot(&vec0, &vec1);
-}
-
-static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4LengthSqr(&vec);
-}
-
-static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec )
-{
-    return vmathSoaV4Length(&vec);
-}
-
-static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Normalize(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 )
-{
-    VmathSoaVector4 result;
-    vmathSoaV4Select(&result, &vec0, &vec1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaV4Print_V( VmathSoaVector4 vec )
-{
-    vmathSoaV4Print(&vec);
-}
-
-static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name )
-{
-    vmathSoaV4Prints(&vec, name);
-}
-
-#endif
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromElems(&result, _x, _y, _z);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromV3(&result, &vec);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromScalar(&result, scalar);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFromAos(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MakeFrom4Aos(&result, &pnt0, &pnt1, &pnt2, &pnt3);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Lerp(&result, t, &pnt0, &pnt1);
-    return result;
-}
-
-static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
-{
-    vmathSoaP3Get4Aos(&pnt, result0, result1, result2, result3);
-}
-
-static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
-{
-    vmathSoaP3LoadXYZArray(vec, threeQuads);
-}
-
-static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 vec, vec_float4 *threeQuads )
-{
-    vmathSoaP3StoreXYZArray(&vec, threeQuads);
-}
-
-static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads )
-{
-    vmathSoaP3StoreHalfFloats(&pnt0, &pnt1, threeQuads);
-}
-
-static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 _x )
-{
-    vmathSoaP3SetX(result, _x);
-}
-
-static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetX(&pnt);
-}
-
-static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 _y )
-{
-    vmathSoaP3SetY(result, _y);
-}
-
-static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetY(&pnt);
-}
-
-static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 _z )
-{
-    vmathSoaP3SetZ(result, _z);
-}
-
-static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3GetZ(&pnt);
-}
-
-static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value )
-{
-    vmathSoaP3SetElem(result, idx, value);
-}
-
-static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx )
-{
-    return vmathSoaP3GetElem(&pnt, idx);
-}
-
-static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaVector3 result;
-    vmathSoaP3Sub(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3AddV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3SubV3(&result, &pnt, &vec1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MulPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3DivPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3RecipPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3SqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3RsqrtPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3AbsPerElem(&result, &pnt);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3CopySignPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MaxPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3MaxElem(&pnt);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3MinPerElem(&result, &pnt0, &pnt1);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3MinElem(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3Sum(&pnt);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Scale(&result, &pnt, scaleVal);
-    return result;
-}
-
-static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3NonUniformScale(&result, &pnt, &scaleVec);
-    return result;
-}
-
-static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec )
-{
-    return vmathSoaP3Projection(&pnt, &unitVec);
-}
-
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3DistSqrFromOrigin(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt )
-{
-    return vmathSoaP3DistFromOrigin(&pnt);
-}
-
-static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    return vmathSoaP3DistSqr(&pnt0, &pnt1);
-}
-
-static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
-{
-    return vmathSoaP3Dist(&pnt0, &pnt1);
-}
-
-static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 )
-{
-    VmathSoaPoint3 result;
-    vmathSoaP3Select(&result, &pnt0, &pnt1, select1);
-    return result;
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt )
-{
-    vmathSoaP3Print(&pnt);
-}
-
-static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name )
-{
-    vmathSoaP3Prints(&pnt, name);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_V_C_H
+#define _VECTORMATH_VEC_SOA_V_C_H
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*-----------------------------------------------------------------------------
+ * Constants
+ * for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+ */
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+/*-----------------------------------------------------------------------------
+ * Definitions
+ */
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFromAos(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 )
+{
+    vmathSoaV3Get4Aos(&vec, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads )
+{
+    vmathSoaV3LoadXYZArray(vec, threeQuads);
+}
+
+static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads )
+{
+    vmathSoaV3StoreXYZArray(&vec, threeQuads);
+}
+
+static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads )
+{
+    vmathSoaV3StoreHalfFloats(&vec0, &vec1, threeQuads);
+}
+
+static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 _x )
+{
+    vmathSoaV3SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetX(&vec);
+}
+
+static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 _y )
+{
+    vmathSoaV3SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetY(&vec);
+}
+
+static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 _z )
+{
+    vmathSoaV3SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3GetZ(&vec);
+}
+
+static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value )
+{
+    vmathSoaV3SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx )
+{
+    return vmathSoaV3GetElem(&vec, idx);
+}
+
+static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaV3AddP3(&result, &vec, &pnt1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3MaxElem(&vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3MinElem(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3Sum(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    return vmathSoaV3Dot(&vec0, &vec1);
+}
+
+static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3LengthSqr(&vec);
+}
+
+static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec )
+{
+    return vmathSoaV3Length(&vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Cross(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaV3Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV3Print_V( VmathSoaVector3 vec )
+{
+    vmathSoaV3Print(&vec);
+}
+
+static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name )
+{
+    vmathSoaV3Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromElems(&result, _x, _y, _z, _w);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 _w )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromV3Scalar(&result, &xyz, _w);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromP3(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromQ(&result, &quat);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFromAos(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeFrom4Aos(&result, &vec0, &vec1, &vec2, &vec3);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeXAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeYAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeZAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MakeWAxis(&result);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Lerp(&result, t, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Slerp(&result, t, &unitVec0, &unitVec1);
+    return result;
+}
+
+static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 )
+{
+    vmathSoaV4Get4Aos(&vec, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads )
+{
+    vmathSoaV4StoreHalfFloats(&vec, twoQuads);
+}
+
+static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec )
+{
+    vmathSoaV4SetXYZ(result, &vec);
+}
+
+static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector3 result;
+    vmathSoaV4GetXYZ(&result, &vec);
+    return result;
+}
+
+static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 _x )
+{
+    vmathSoaV4SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetX(&vec);
+}
+
+static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 _y )
+{
+    vmathSoaV4SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetY(&vec);
+}
+
+static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 _z )
+{
+    vmathSoaV4SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetZ(&vec);
+}
+
+static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 _w )
+{
+    vmathSoaV4SetW(result, _w);
+}
+
+static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4GetW(&vec);
+}
+
+static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value )
+{
+    vmathSoaV4SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx )
+{
+    return vmathSoaV4GetElem(&vec, idx);
+}
+
+static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Add(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Sub(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4ScalarMul(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4ScalarDiv(&result, &vec, scalar);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Neg(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MulPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4DivPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4RecipPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4SqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4RsqrtPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4AbsPerElem(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4CopySignPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MaxPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4MaxElem(&vec);
+}
+
+static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4MinPerElem(&result, &vec0, &vec1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4MinElem(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4Sum(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 )
+{
+    return vmathSoaV4Dot(&vec0, &vec1);
+}
+
+static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4LengthSqr(&vec);
+}
+
+static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec )
+{
+    return vmathSoaV4Length(&vec);
+}
+
+static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Normalize(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 )
+{
+    VmathSoaVector4 result;
+    vmathSoaV4Select(&result, &vec0, &vec1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaV4Print_V( VmathSoaVector4 vec )
+{
+    vmathSoaV4Print(&vec);
+}
+
+static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name )
+{
+    vmathSoaV4Prints(&vec, name);
+}
+
+#endif
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromElems(&result, _x, _y, _z);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromV3(&result, &vec);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromScalar(&result, scalar);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFromAos(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MakeFrom4Aos(&result, &pnt0, &pnt1, &pnt2, &pnt3);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Lerp(&result, t, &pnt0, &pnt1);
+    return result;
+}
+
+static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 )
+{
+    vmathSoaP3Get4Aos(&pnt, result0, result1, result2, result3);
+}
+
+static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *vec, const vec_float4 *threeQuads )
+{
+    vmathSoaP3LoadXYZArray(vec, threeQuads);
+}
+
+static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 vec, vec_float4 *threeQuads )
+{
+    vmathSoaP3StoreXYZArray(&vec, threeQuads);
+}
+
+static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads )
+{
+    vmathSoaP3StoreHalfFloats(&pnt0, &pnt1, threeQuads);
+}
+
+static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 _x )
+{
+    vmathSoaP3SetX(result, _x);
+}
+
+static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetX(&pnt);
+}
+
+static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 _y )
+{
+    vmathSoaP3SetY(result, _y);
+}
+
+static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetY(&pnt);
+}
+
+static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 _z )
+{
+    vmathSoaP3SetZ(result, _z);
+}
+
+static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3GetZ(&pnt);
+}
+
+static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value )
+{
+    vmathSoaP3SetElem(result, idx, value);
+}
+
+static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx )
+{
+    return vmathSoaP3GetElem(&pnt, idx);
+}
+
+static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaVector3 result;
+    vmathSoaP3Sub(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3AddV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3SubV3(&result, &pnt, &vec1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MulPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3DivPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3RecipPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3SqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3RsqrtPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3AbsPerElem(&result, &pnt);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3CopySignPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MaxPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3MaxElem(&pnt);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3MinPerElem(&result, &pnt0, &pnt1);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3MinElem(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3Sum(&pnt);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Scale(&result, &pnt, scaleVal);
+    return result;
+}
+
+static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3NonUniformScale(&result, &pnt, &scaleVec);
+    return result;
+}
+
+static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec )
+{
+    return vmathSoaP3Projection(&pnt, &unitVec);
+}
+
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3DistSqrFromOrigin(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt )
+{
+    return vmathSoaP3DistFromOrigin(&pnt);
+}
+
+static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    return vmathSoaP3DistSqr(&pnt0, &pnt1);
+}
+
+static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 )
+{
+    return vmathSoaP3Dist(&pnt0, &pnt1);
+}
+
+static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 )
+{
+    VmathSoaPoint3 result;
+    vmathSoaP3Select(&result, &pnt0, &pnt1, select1);
+    return result;
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt )
+{
+    vmathSoaP3Print(&pnt);
+}
+
+static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name )
+{
+    vmathSoaP3Prints(&pnt, name);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos.h
index 12ee919da..5fa9950e8 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos.h
@@ -1,1952 +1,1951 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_H
-#define _VECTORMATH_AOS_C_H
-
-#include <math.h>
-#include "spu2vmx.h"
-#include "simdmath.h"
-#include "stdio.h"
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    vec_float4 vec128;
-} VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    vec_float4 vec128;
-} VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    vec_float4 vec128;
-} VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    vec_float4 vec128;
-} VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Copy a 3-D vector
- */
-static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
-
-/*
- * Set vector float data in a 3-D vector
- */
-static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D vector
- */
-static inline vec_float4 vmathV3Get128( const VmathVector3 *vec );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX( const VmathVector3 *vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY( const VmathVector3 *vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ( const VmathVector3 *vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV3MakeXAxis( VmathVector3 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV3MakeYAxis( VmathVector3 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV3MakeZAxis( VmathVector3 *result );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem( const VmathVector3 *vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem( const VmathVector3 *vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum( const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr( const VmathVector3 *vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length( const VmathVector3 *vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- * NOTE: 
- * Slower than column post-multiply.
- */
-static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D vectors in three quadwords
- */
-static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D vectors as half-floats
- */
-static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print( const VmathVector3 *vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 4-D vector
- */
-static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
-
-/*
- * Set vector float data in a 4-D vector
- */
-static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 4-D vector
- */
-static inline vec_float4 vmathV4Get128( const VmathVector4 *vec );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX( const VmathVector4 *vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY( const VmathVector4 *vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ( const VmathVector4 *vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW( const VmathVector4 *vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathV4MakeXAxis( VmathVector4 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathV4MakeYAxis( VmathVector4 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathV4MakeZAxis( VmathVector4 *result );
-
-/*
- * Construct w axis
- */
-static inline void vmathV4MakeWAxis( VmathVector4 *result );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem( const VmathVector4 *vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem( const VmathVector4 *vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum( const VmathVector4 *vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr( const VmathVector4 *vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length( const VmathVector4 *vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
-
-/*
- * Store four 4-D vectors as half-floats
- */
-static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print( const VmathVector4 *vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 3-D point
- */
-static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
-
-/*
- * Set vector float data in a 3-D point
- */
-static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D point
- */
-static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX( const VmathPoint3 *pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY( const VmathPoint3 *pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ( const VmathPoint3 *pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem( const VmathPoint3 *pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum( const VmathPoint3 *pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D points in three quadwords
- */
-static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D points as half-floats
- */
-static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print( const VmathPoint3 *pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
-
-#endif
-
-/*
- * Copy a quaternion
- */
-static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
-
-/*
- * Set vector float data in a quaternion
- */
-static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 );
-
-/*
- * Get vector float data from a quaternion
- */
-static inline vec_float4 vmathQGet128( const VmathQuat *quat );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX( const VmathQuat *quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY( const VmathQuat *quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ( const VmathQuat *quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW( const VmathQuat *quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem( const VmathQuat *quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline void vmathQMakeIdentity( VmathQuat *result );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm( const VmathQuat *quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength( const VmathQuat *quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint( const VmathQuat *quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints( const VmathQuat *quat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x3 matrix
- */
-static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant( const VmathMatrix3 *mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print( const VmathMatrix3 *mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 4x4 matrix
- */
-static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant( const VmathMatrix4 *mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print( const VmathMatrix4 *mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x4 transformation matrix
- */
-static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline void vmathT3MakeIdentity( VmathTransform3 *result );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print( const VmathTransform3 *tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_SPU_H
+#define _VECTORMATH_AOS_C_SPU_H
+
+#include <math.h>
+#include <simdmath.h>
+#include <stdio.h>
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    vec_float4 vec128;
+} VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    vec_float4 vec128;
+} VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    vec_float4 vec128;
+} VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    vec_float4 vec128;
+} VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Copy a 3-D vector
+ */
+static inline void vmathV3Copy( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline void vmathV3MakeFromElems( VmathVector3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline void vmathV3MakeFromP3( VmathVector3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline void vmathV3MakeFromScalar( VmathVector3 *result, float scalar );
+
+/*
+ * Set vector float data in a 3-D vector
+ */
+static inline void vmathV3MakeFrom128( VmathVector3 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D vector
+ */
+static inline vec_float4 vmathV3Get128( const VmathVector3 *vec );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX( const VmathVector3 *vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY( const VmathVector3 *vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ( const VmathVector3 *vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem( const VmathVector3 *vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline void vmathV3Add( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline void vmathV3Sub( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline void vmathV3AddP3( VmathPoint3 *result, const VmathVector3 *vec, const VmathPoint3 *pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarMul( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline void vmathV3ScalarDiv( VmathVector3 *result, const VmathVector3 *vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline void vmathV3Neg( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV3MakeXAxis( VmathVector3 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV3MakeYAxis( VmathVector3 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV3MakeZAxis( VmathVector3 *result );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline void vmathV3MulPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV3DivPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV3RecipPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV3SqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV3RsqrtPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline void vmathV3AbsPerElem( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline void vmathV3CopySignPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline void vmathV3MaxPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline void vmathV3MinPerElem( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem( const VmathVector3 *vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem( const VmathVector3 *vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum( const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot( const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr( const VmathVector3 *vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length( const VmathVector3 *vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV3Normalize( VmathVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline void vmathV3Cross( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline void vmathV3Outer( VmathMatrix3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ * NOTE: 
+ * Slower than column post-multiply.
+ */
+static inline void vmathV3RowMul( VmathVector3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline void vmathV3CrossMatrix( VmathMatrix3 *result, const VmathVector3 *vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline void vmathV3CrossMatrixMul( VmathMatrix3 *result, const VmathVector3 *vec, const VmathMatrix3 *mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Lerp( VmathVector3 *result, float t, const VmathVector3 *vec0, const VmathVector3 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV3Slerp( VmathVector3 *result, float t, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathV3Select( VmathVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathV3StoreXYZ( const VmathVector3 *vec, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathV3LoadXYZArray( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D vectors in three quadwords
+ */
+static inline void vmathV3StoreXYZArray( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D vectors as half-floats
+ */
+static inline void vmathV3StoreHalfFloats( const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3, const VmathVector3 *vec4, const VmathVector3 *vec5, const VmathVector3 *vec6, const VmathVector3 *vec7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print( const VmathVector3 *vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints( const VmathVector3 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 4-D vector
+ */
+static inline void vmathV4Copy( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline void vmathV4MakeFromElems( VmathVector4 *result, float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline void vmathV4MakeFromV3Scalar( VmathVector4 *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline void vmathV4MakeFromV3( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline void vmathV4MakeFromP3( VmathVector4 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline void vmathV4MakeFromQ( VmathVector4 *result, const VmathQuat *quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline void vmathV4MakeFromScalar( VmathVector4 *result, float scalar );
+
+/*
+ * Set vector float data in a 4-D vector
+ */
+static inline void vmathV4MakeFrom128( VmathVector4 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 4-D vector
+ */
+static inline vec_float4 vmathV4Get128( const VmathVector4 *vec );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ( VmathVector4 *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline void vmathV4GetXYZ( VmathVector3 *result, const VmathVector4 *vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX( const VmathVector4 *vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY( const VmathVector4 *vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ( const VmathVector4 *vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW( const VmathVector4 *vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem( const VmathVector4 *vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline void vmathV4Add( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline void vmathV4Sub( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarMul( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline void vmathV4ScalarDiv( VmathVector4 *result, const VmathVector4 *vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline void vmathV4Neg( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathV4MakeXAxis( VmathVector4 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathV4MakeYAxis( VmathVector4 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathV4MakeZAxis( VmathVector4 *result );
+
+/*
+ * Construct w axis
+ */
+static inline void vmathV4MakeWAxis( VmathVector4 *result );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline void vmathV4MulPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathV4DivPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathV4RecipPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathV4SqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathV4RsqrtPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline void vmathV4AbsPerElem( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline void vmathV4CopySignPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline void vmathV4MaxPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline void vmathV4MinPerElem( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem( const VmathVector4 *vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem( const VmathVector4 *vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum( const VmathVector4 *vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot( const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr( const VmathVector4 *vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length( const VmathVector4 *vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathV4Normalize( VmathVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline void vmathV4Outer( VmathMatrix4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Lerp( VmathVector4 *result, float t, const VmathVector4 *vec0, const VmathVector4 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathV4Slerp( VmathVector4 *result, float t, const VmathVector4 *unitVec0, const VmathVector4 *unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathV4Select( VmathVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, unsigned int select1 );
+
+/*
+ * Store four 4-D vectors as half-floats
+ */
+static inline void vmathV4StoreHalfFloats( const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print( const VmathVector4 *vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints( const VmathVector4 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 3-D point
+ */
+static inline void vmathP3Copy( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline void vmathP3MakeFromElems( VmathPoint3 *result, float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline void vmathP3MakeFromV3( VmathPoint3 *result, const VmathVector3 *vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline void vmathP3MakeFromScalar( VmathPoint3 *result, float scalar );
+
+/*
+ * Set vector float data in a 3-D point
+ */
+static inline void vmathP3MakeFrom128( VmathPoint3 *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D point
+ */
+static inline vec_float4 vmathP3Get128( const VmathPoint3 *pnt );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX( const VmathPoint3 *pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY( const VmathPoint3 *pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ( const VmathPoint3 *pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem( const VmathPoint3 *pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline void vmathP3Sub( VmathVector3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline void vmathP3AddV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline void vmathP3SubV3( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline void vmathP3MulPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathP3DivPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathP3RecipPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathP3SqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathP3RsqrtPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline void vmathP3AbsPerElem( VmathPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline void vmathP3CopySignPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline void vmathP3MaxPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline void vmathP3MinPerElem( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem( const VmathPoint3 *pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem( const VmathPoint3 *pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum( const VmathPoint3 *pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline void vmathP3Scale( VmathPoint3 *result, const VmathPoint3 *pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline void vmathP3NonUniformScale( VmathPoint3 *result, const VmathPoint3 *pnt, const VmathVector3 *scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection( const VmathPoint3 *pnt, const VmathVector3 *unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin( const VmathPoint3 *pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathP3Lerp( VmathPoint3 *result, float t, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathP3Select( VmathPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathP3StoreXYZ( const VmathPoint3 *pnt, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathP3LoadXYZArray( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D points in three quadwords
+ */
+static inline void vmathP3StoreXYZArray( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D points as half-floats
+ */
+static inline void vmathP3StoreHalfFloats( const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3, const VmathPoint3 *pnt4, const VmathPoint3 *pnt5, const VmathPoint3 *pnt6, const VmathPoint3 *pnt7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print( const VmathPoint3 *pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints( const VmathPoint3 *pnt, const char *name );
+
+#endif
+
+/*
+ * Copy a quaternion
+ */
+static inline void vmathQCopy( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline void vmathQMakeFromElems( VmathQuat *result, float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline void vmathQMakeFromV3Scalar( VmathQuat *result, const VmathVector3 *xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline void vmathQMakeFromV4( VmathQuat *result, const VmathVector4 *vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline void vmathQMakeFromM3( VmathQuat *result, const VmathMatrix3 *rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline void vmathQMakeFromScalar( VmathQuat *result, float scalar );
+
+/*
+ * Set vector float data in a quaternion
+ */
+static inline void vmathQMakeFrom128( VmathQuat *result, vec_float4 vf4 );
+
+/*
+ * Get vector float data from a quaternion
+ */
+static inline vec_float4 vmathQGet128( const VmathQuat *quat );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ( VmathQuat *result, const VmathVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline void vmathQGetXYZ( VmathVector3 *result, const VmathQuat *quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX( const VmathQuat *quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY( const VmathQuat *quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ( const VmathQuat *quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW( const VmathQuat *quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem( const VmathQuat *quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline void vmathQAdd( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline void vmathQSub( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline void vmathQMul( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline void vmathQScalarMul( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline void vmathQScalarDiv( VmathQuat *result, const VmathQuat *quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline void vmathQNeg( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline void vmathQMakeIdentity( VmathQuat *result );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline void vmathQMakeRotationArc( VmathQuat *result, const VmathVector3 *unitVec0, const VmathVector3 *unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline void vmathQMakeRotationAxis( VmathQuat *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline void vmathQMakeRotationX( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline void vmathQMakeRotationY( VmathQuat *result, float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline void vmathQMakeRotationZ( VmathQuat *result, float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline void vmathQConj( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline void vmathQRotate( VmathVector3 *result, const VmathQuat *unitQuat, const VmathVector3 *vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot( const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm( const VmathQuat *quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength( const VmathQuat *quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline void vmathQNormalize( VmathQuat *result, const VmathQuat *quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQLerp( VmathQuat *result, float t, const VmathQuat *quat0, const VmathQuat *quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathQSlerp( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline void vmathQSquad( VmathQuat *result, float t, const VmathQuat *unitQuat0, const VmathQuat *unitQuat1, const VmathQuat *unitQuat2, const VmathQuat *unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathQSelect( VmathQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint( const VmathQuat *quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints( const VmathQuat *quat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x3 matrix
+ */
+static inline void vmathM3Copy( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline void vmathM3MakeFromCols( VmathMatrix3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeFromQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline void vmathM3MakeFromScalar( VmathMatrix3 *result, float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0( VmathMatrix3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1( VmathMatrix3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2( VmathMatrix3 *result, const VmathVector3 *col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol0( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol1( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline void vmathM3GetCol2( VmathVector3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol( VmathMatrix3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow( VmathMatrix3 *result, int row, const VmathVector3 *vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetCol( VmathVector3 *result, const VmathMatrix3 *mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3GetRow( VmathVector3 *result, const VmathMatrix3 *mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem( const VmathMatrix3 *mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline void vmathM3Add( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline void vmathM3Sub( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline void vmathM3Neg( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline void vmathM3ScalarMul( VmathMatrix3 *result, const VmathMatrix3 *mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline void vmathM3MulV3( VmathVector3 *result, const VmathMatrix3 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline void vmathM3Mul( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline void vmathM3MakeIdentity( VmathMatrix3 *result );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline void vmathM3MakeRotationX( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline void vmathM3MakeRotationY( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline void vmathM3MakeRotationZ( VmathMatrix3 *result, float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM3MakeRotationZYX( VmathMatrix3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM3MakeRotationAxis( VmathMatrix3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM3MakeRotationQ( VmathMatrix3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline void vmathM3MakeScale( VmathMatrix3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3AppendScale( VmathMatrix3 *result, const VmathMatrix3 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM3PrependScale( VmathMatrix3 *result, const VmathVector3 *scaleVec, const VmathMatrix3 *mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline void vmathM3MulPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline void vmathM3AbsPerElem( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline void vmathM3Transpose( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM3Inverse( VmathMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant( const VmathMatrix3 *mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathM3Select( VmathMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print( const VmathMatrix3 *mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints( const VmathMatrix3 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 4x4 matrix
+ */
+static inline void vmathM4Copy( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline void vmathM4MakeFromCols( VmathMatrix4 *result, const VmathVector4 *col0, const VmathVector4 *col1, const VmathVector4 *col2, const VmathVector4 *col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline void vmathM4MakeFromT3( VmathMatrix4 *result, const VmathTransform3 *mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathM4MakeFromM3V3( VmathMatrix4 *result, const VmathMatrix3 *mat, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathM4MakeFromQV3( VmathMatrix4 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline void vmathM4MakeFromScalar( VmathMatrix4 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3( VmathMatrix4 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline void vmathM4GetUpper3x3( VmathMatrix3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline void vmathM4GetTranslation( VmathVector3 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0( VmathMatrix4 *result, const VmathVector4 *col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1( VmathMatrix4 *result, const VmathVector4 *col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2( VmathMatrix4 *result, const VmathVector4 *col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3( VmathMatrix4 *result, const VmathVector4 *col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol0( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol1( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol2( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline void vmathM4GetCol3( VmathVector4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol( VmathMatrix4 *result, int col, const VmathVector4 *vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow( VmathMatrix4 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetCol( VmathVector4 *result, const VmathMatrix4 *mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4GetRow( VmathVector4 *result, const VmathMatrix4 *mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem( const VmathMatrix4 *mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline void vmathM4Add( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline void vmathM4Sub( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline void vmathM4Neg( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline void vmathM4ScalarMul( VmathMatrix4 *result, const VmathMatrix4 *mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline void vmathM4MulV4( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector4 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline void vmathM4MulV3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathVector3 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline void vmathM4MulP3( VmathVector4 *result, const VmathMatrix4 *mat, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline void vmathM4Mul( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline void vmathM4MulT3( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathTransform3 *tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline void vmathM4MakeIdentity( VmathMatrix4 *result );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline void vmathM4MakeRotationX( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline void vmathM4MakeRotationY( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline void vmathM4MakeRotationZ( VmathMatrix4 *result, float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathM4MakeRotationZYX( VmathMatrix4 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathM4MakeRotationAxis( VmathMatrix4 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathM4MakeRotationQ( VmathMatrix4 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline void vmathM4MakeScale( VmathMatrix4 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline void vmathM4MakeTranslation( VmathMatrix4 *result, const VmathVector3 *translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline void vmathM4MakeLookAt( VmathMatrix4 *result, const VmathPoint3 *eyePos, const VmathPoint3 *lookAtPos, const VmathVector3 *upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline void vmathM4MakePerspective( VmathMatrix4 *result, float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline void vmathM4MakeFrustum( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline void vmathM4MakeOrthographic( VmathMatrix4 *result, float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4AppendScale( VmathMatrix4 *result, const VmathMatrix4 *mat, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathM4PrependScale( VmathMatrix4 *result, const VmathVector3 *scaleVec, const VmathMatrix4 *mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline void vmathM4MulPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline void vmathM4AbsPerElem( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline void vmathM4Transpose( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4Inverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathM4AffineInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline void vmathM4OrthoInverse( VmathMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant( const VmathMatrix4 *mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathM4Select( VmathMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print( const VmathMatrix4 *mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints( const VmathMatrix4 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x4 transformation matrix
+ */
+static inline void vmathT3Copy( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline void vmathT3MakeFromCols( VmathTransform3 *result, const VmathVector3 *col0, const VmathVector3 *col1, const VmathVector3 *col2, const VmathVector3 *col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathT3MakeFromM3V3( VmathTransform3 *result, const VmathMatrix3 *tfrm, const VmathVector3 *translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathT3MakeFromQV3( VmathTransform3 *result, const VmathQuat *unitQuat, const VmathVector3 *translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline void vmathT3MakeFromScalar( VmathTransform3 *result, float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3( VmathTransform3 *result, const VmathMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetUpper3x3( VmathMatrix3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetTranslation( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0( VmathTransform3 *result, const VmathVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1( VmathTransform3 *result, const VmathVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2( VmathTransform3 *result, const VmathVector3 *col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3( VmathTransform3 *result, const VmathVector3 *col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol0( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol1( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol2( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3GetCol3( VmathVector3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol( VmathTransform3 *result, int col, const VmathVector3 *vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow( VmathTransform3 *result, int row, const VmathVector4 *vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetCol( VmathVector3 *result, const VmathTransform3 *tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3GetRow( VmathVector4 *result, const VmathTransform3 *tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem( const VmathTransform3 *tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline void vmathT3MulV3( VmathVector3 *result, const VmathTransform3 *tfrm, const VmathVector3 *vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline void vmathT3MulP3( VmathPoint3 *result, const VmathTransform3 *tfrm, const VmathPoint3 *pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline void vmathT3Mul( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline void vmathT3MakeIdentity( VmathTransform3 *result );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline void vmathT3MakeRotationX( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline void vmathT3MakeRotationY( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline void vmathT3MakeRotationZ( VmathTransform3 *result, float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathT3MakeRotationZYX( VmathTransform3 *result, const VmathVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathT3MakeRotationAxis( VmathTransform3 *result, float radians, const VmathVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathT3MakeRotationQ( VmathTransform3 *result, const VmathQuat *unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline void vmathT3MakeScale( VmathTransform3 *result, const VmathVector3 *scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline void vmathT3MakeTranslation( VmathTransform3 *result, const VmathVector3 *translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3AppendScale( VmathTransform3 *result, const VmathTransform3 *tfrm, const VmathVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathT3PrependScale( VmathTransform3 *result, const VmathVector3 *scaleVec, const VmathTransform3 *tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline void vmathT3MulPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline void vmathT3AbsPerElem( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline void vmathT3Inverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline void vmathT3OrthoInverse( VmathTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathT3Select( VmathTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print( const VmathTransform3 *tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints( const VmathTransform3 *tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos_v.h
index eb6912557..242d938a0 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_aos_v.h
@@ -1,1916 +1,1916 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_C_V_H
-#define _VECTORMATH_AOS_C_V_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_AOS_C_TYPES_H
-#define _VECTORMATH_AOS_C_TYPES_H
-
-/* A 3-D vector in array-of-structures format
- */
-typedef struct _VmathVector3
-{
-    vec_float4 vec128;
-} VmathVector3;
-
-/* A 4-D vector in array-of-structures format
- */
-typedef struct _VmathVector4
-{
-    vec_float4 vec128;
-} VmathVector4;
-
-/* A 3-D point in array-of-structures format
- */
-typedef struct _VmathPoint3
-{
-    vec_float4 vec128;
-} VmathPoint3;
-
-/* A quaternion in array-of-structures format
- */
-typedef struct _VmathQuat
-{
-    vec_float4 vec128;
-} VmathQuat;
-
-/* A 3x3 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-} VmathMatrix3;
-
-/* A 4x4 matrix in array-of-structures format
- */
-typedef struct _VmathMatrix4
-{
-    VmathVector4 col0;
-    VmathVector4 col1;
-    VmathVector4 col2;
-    VmathVector4 col3;
-} VmathMatrix4;
-
-/* A 3x4 transformation matrix in array-of-structures format
- */
-typedef struct _VmathTransform3
-{
-    VmathVector3 col0;
-    VmathVector3 col1;
-    VmathVector3 col2;
-    VmathVector3 col3;
-} VmathTransform3;
-
-#endif
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 3-D vector
- */
-static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D vector
- */
-static inline vec_float4 vmathV3Get128_V( VmathVector3 vec );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathV3SetX_V( VmathVector3 *result, float x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathV3SetY_V( VmathVector3 *result, float y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline float vmathV3GetX_V( VmathVector3 vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline float vmathV3GetY_V( VmathVector3 vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline float vmathV3GetZ_V( VmathVector3 vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector3 vmathV3MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector3 vmathV3MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector3 vmathV3MakeZAxis_V( );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline float vmathV3MaxElem_V( VmathVector3 vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline float vmathV3MinElem_V( VmathVector3 vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline float vmathV3Sum_V( VmathVector3 vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline float vmathV3LengthSqr_V( VmathVector3 vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline float vmathV3Length_V( VmathVector3 vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- * NOTE: 
- * Slower than column post-multiply.
- */
-static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D vectors in three quadwords
- */
-static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D vectors as half-floats
- */
-static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Print_V( VmathVector3 vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 4-D vector
- */
-static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 4-D vector
- */
-static inline vec_float4 vmathV4Get128_V( VmathVector4 vec );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathV4SetX_V( VmathVector4 *result, float x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathV4SetY_V( VmathVector4 *result, float y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathV4SetW_V( VmathVector4 *result, float w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline float vmathV4GetX_V( VmathVector4 vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline float vmathV4GetY_V( VmathVector4 vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline float vmathV4GetZ_V( VmathVector4 vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline float vmathV4GetW_V( VmathVector4 vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathVector4 vmathV4MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathVector4 vmathV4MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathVector4 vmathV4MakeZAxis_V( );
-
-/*
- * Construct w axis
- */
-static inline VmathVector4 vmathV4MakeWAxis_V( );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline float vmathV4MaxElem_V( VmathVector4 vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline float vmathV4MinElem_V( VmathVector4 vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline float vmathV4Sum_V( VmathVector4 vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline float vmathV4LengthSqr_V( VmathVector4 vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline float vmathV4Length_V( VmathVector4 vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
-
-/*
- * Store four 4-D vectors as half-floats
- */
-static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Print_V( VmathVector4 vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a 3-D point
- */
-static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a 3-D point
- */
-static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline float vmathP3GetX_V( VmathPoint3 pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline float vmathP3GetY_V( VmathPoint3 pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline float vmathP3GetZ_V( VmathPoint3 pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline float vmathP3MinElem_V( VmathPoint3 pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline float vmathP3Sum_V( VmathPoint3 pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
-
-/*
- * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
- * The value of the fourth word (the word with the highest address) remains unchanged
- */
-static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
-
-/*
- * Store four 3-D points in three quadwords
- */
-static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads );
-
-/*
- * Store eight 3-D points as half-floats
- */
-static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Print_V( VmathPoint3 pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
-
-#endif
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
-
-/*
- * Set vector float data in a quaternion
- */
-static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 );
-
-/*
- * Get vector float data from a quaternion
- */
-static inline vec_float4 vmathQGet128_V( VmathQuat quat );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathQSetX_V( VmathQuat *result, float x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathQSetY_V( VmathQuat *result, float y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathQSetZ_V( VmathQuat *result, float z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathQSetW_V( VmathQuat *result, float w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline float vmathQGetX_V( VmathQuat quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline float vmathQGetY_V( VmathQuat quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline float vmathQGetZ_V( VmathQuat quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline float vmathQGetW_V( VmathQuat quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline float vmathQGetElem_V( VmathQuat quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline VmathQuat vmathQNeg_V( VmathQuat quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline VmathQuat vmathQMakeIdentity_V( );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline VmathQuat vmathQMakeRotationX_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline VmathQuat vmathQMakeRotationY_V( float radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline VmathQuat vmathQMakeRotationZ_V( float radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline VmathQuat vmathQConj_V( VmathQuat quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline float vmathQNorm_V( VmathQuat quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline float vmathQLength_V( VmathQuat quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrint_V( VmathQuat quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathQPrints_V( VmathQuat quat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3MakeIdentity_V( );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline float vmathM3Determinant_V( VmathMatrix3 mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Print_V( VmathMatrix3 mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4MakeIdentity_V( );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline float vmathM4Determinant_V( VmathMatrix4 mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Print_V( VmathMatrix4 mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline VmathTransform3 vmathT3MakeIdentity_V( );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Print_V( VmathTransform3 tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vectormath_aos.h"
-#include "vec_aos_v.h"
-#include "quat_aos_v.h"
-#include "mat_aos_v.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_C_V_SPU_H
+#define _VECTORMATH_AOS_C_V_SPU_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_AOS_C_TYPES_H
+#define _VECTORMATH_AOS_C_TYPES_H
+
+/* A 3-D vector in array-of-structures format
+ */
+typedef struct _VmathVector3
+{
+    vec_float4 vec128;
+} VmathVector3;
+
+/* A 4-D vector in array-of-structures format
+ */
+typedef struct _VmathVector4
+{
+    vec_float4 vec128;
+} VmathVector4;
+
+/* A 3-D point in array-of-structures format
+ */
+typedef struct _VmathPoint3
+{
+    vec_float4 vec128;
+} VmathPoint3;
+
+/* A quaternion in array-of-structures format
+ */
+typedef struct _VmathQuat
+{
+    vec_float4 vec128;
+} VmathQuat;
+
+/* A 3x3 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+} VmathMatrix3;
+
+/* A 4x4 matrix in array-of-structures format
+ */
+typedef struct _VmathMatrix4
+{
+    VmathVector4 col0;
+    VmathVector4 col1;
+    VmathVector4 col2;
+    VmathVector4 col3;
+} VmathMatrix4;
+
+/* A 3x4 transformation matrix in array-of-structures format
+ */
+typedef struct _VmathTransform3
+{
+    VmathVector3 col0;
+    VmathVector3 col1;
+    VmathVector3 col2;
+    VmathVector3 col3;
+} VmathTransform3;
+
+#endif
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline VmathVector3 vmathV3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline VmathVector3 vmathV3MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline VmathVector3 vmathV3MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 3-D vector
+ */
+static inline VmathVector3 vmathV3MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D vector
+ */
+static inline vec_float4 vmathV3Get128_V( VmathVector3 vec );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathV3SetX_V( VmathVector3 *result, float x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathV3SetY_V( VmathVector3 *result, float y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathV3SetZ_V( VmathVector3 *result, float z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline float vmathV3GetX_V( VmathVector3 vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline float vmathV3GetY_V( VmathVector3 vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline float vmathV3GetZ_V( VmathVector3 vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathV3SetElem_V( VmathVector3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline float vmathV3GetElem_V( VmathVector3 vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Add_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline VmathVector3 vmathV3Sub_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline VmathPoint3 vmathV3AddP3_V( VmathVector3 vec, VmathPoint3 pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarMul_V( VmathVector3 vec, float scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline VmathVector3 vmathV3ScalarDiv_V( VmathVector3 vec, float scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline VmathVector3 vmathV3Neg_V( VmathVector3 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector3 vmathV3MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector3 vmathV3MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector3 vmathV3MakeZAxis_V( );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MulPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector3 vmathV3DivPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector3 vmathV3RecipPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector3 vmathV3SqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector3 vmathV3RsqrtPerElem_V( VmathVector3 vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline VmathVector3 vmathV3AbsPerElem_V( VmathVector3 vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline VmathVector3 vmathV3CopySignPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MaxPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline VmathVector3 vmathV3MinPerElem_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline float vmathV3MaxElem_V( VmathVector3 vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline float vmathV3MinElem_V( VmathVector3 vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline float vmathV3Sum_V( VmathVector3 vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline float vmathV3Dot_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline float vmathV3LengthSqr_V( VmathVector3 vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline float vmathV3Length_V( VmathVector3 vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector3 vmathV3Normalize_V( VmathVector3 vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline VmathVector3 vmathV3Cross_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline VmathMatrix3 vmathV3Outer_V( VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ * NOTE: 
+ * Slower than column post-multiply.
+ */
+static inline VmathVector3 vmathV3RowMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline VmathMatrix3 vmathV3CrossMatrix_V( VmathVector3 vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline VmathMatrix3 vmathV3CrossMatrixMul_V( VmathVector3 vec, VmathMatrix3 mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Lerp_V( float t, VmathVector3 vec0, VmathVector3 vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector3 vmathV3Slerp_V( float t, VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathVector3 vmathV3Select_V( VmathVector3 vec0, VmathVector3 vec1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathV3StoreXYZ_V( VmathVector3 vec, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathV3LoadXYZArray_V( VmathVector3 *vec0, VmathVector3 *vec1, VmathVector3 *vec2, VmathVector3 *vec3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D vectors in three quadwords
+ */
+static inline void vmathV3StoreXYZArray_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D vectors as half-floats
+ */
+static inline void vmathV3StoreHalfFloats_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3, VmathVector3 vec4, VmathVector3 vec5, VmathVector3 vec6, VmathVector3 vec7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Print_V( VmathVector3 vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV3Prints_V( VmathVector3 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline VmathVector4 vmathV4MakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline VmathVector4 vmathV4MakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline VmathVector4 vmathV4MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline VmathVector4 vmathV4MakeFromP3_V( VmathPoint3 pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline VmathVector4 vmathV4MakeFromQ_V( VmathQuat quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline VmathVector4 vmathV4MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 4-D vector
+ */
+static inline VmathVector4 vmathV4MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 4-D vector
+ */
+static inline vec_float4 vmathV4Get128_V( VmathVector4 vec );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathV4SetXYZ_V( VmathVector4 *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline VmathVector3 vmathV4GetXYZ_V( VmathVector4 vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathV4SetX_V( VmathVector4 *result, float x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathV4SetY_V( VmathVector4 *result, float y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathV4SetZ_V( VmathVector4 *result, float z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathV4SetW_V( VmathVector4 *result, float w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline float vmathV4GetX_V( VmathVector4 vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline float vmathV4GetY_V( VmathVector4 vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline float vmathV4GetZ_V( VmathVector4 vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline float vmathV4GetW_V( VmathVector4 vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathV4SetElem_V( VmathVector4 *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline float vmathV4GetElem_V( VmathVector4 vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline VmathVector4 vmathV4Add_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline VmathVector4 vmathV4Sub_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarMul_V( VmathVector4 vec, float scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline VmathVector4 vmathV4ScalarDiv_V( VmathVector4 vec, float scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline VmathVector4 vmathV4Neg_V( VmathVector4 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathVector4 vmathV4MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathVector4 vmathV4MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathVector4 vmathV4MakeZAxis_V( );
+
+/*
+ * Construct w axis
+ */
+static inline VmathVector4 vmathV4MakeWAxis_V( );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MulPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathVector4 vmathV4DivPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathVector4 vmathV4RecipPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathVector4 vmathV4SqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathVector4 vmathV4RsqrtPerElem_V( VmathVector4 vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline VmathVector4 vmathV4AbsPerElem_V( VmathVector4 vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline VmathVector4 vmathV4CopySignPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MaxPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline VmathVector4 vmathV4MinPerElem_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline float vmathV4MaxElem_V( VmathVector4 vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline float vmathV4MinElem_V( VmathVector4 vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline float vmathV4Sum_V( VmathVector4 vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline float vmathV4Dot_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline float vmathV4LengthSqr_V( VmathVector4 vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline float vmathV4Length_V( VmathVector4 vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathVector4 vmathV4Normalize_V( VmathVector4 vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline VmathMatrix4 vmathV4Outer_V( VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Lerp_V( float t, VmathVector4 vec0, VmathVector4 vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathVector4 vmathV4Slerp_V( float t, VmathVector4 unitVec0, VmathVector4 unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathVector4 vmathV4Select_V( VmathVector4 vec0, VmathVector4 vec1, unsigned int select1 );
+
+/*
+ * Store four 4-D vectors as half-floats
+ */
+static inline void vmathV4StoreHalfFloats_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Print_V( VmathVector4 vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathV4Prints_V( VmathVector4 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline VmathPoint3 vmathP3MakeFromElems_V( float x, float y, float z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline VmathPoint3 vmathP3MakeFromV3_V( VmathVector3 vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline VmathPoint3 vmathP3MakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a 3-D point
+ */
+static inline VmathPoint3 vmathP3MakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a 3-D point
+ */
+static inline vec_float4 vmathP3Get128_V( VmathPoint3 pnt );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathP3SetX_V( VmathPoint3 *result, float x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathP3SetY_V( VmathPoint3 *result, float y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathP3SetZ_V( VmathPoint3 *result, float z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline float vmathP3GetX_V( VmathPoint3 pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline float vmathP3GetY_V( VmathPoint3 pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline float vmathP3GetZ_V( VmathPoint3 pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathP3SetElem_V( VmathPoint3 *result, int idx, float value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline float vmathP3GetElem_V( VmathPoint3 pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline VmathVector3 vmathP3Sub_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline VmathPoint3 vmathP3AddV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline VmathPoint3 vmathP3SubV3_V( VmathPoint3 pnt, VmathVector3 vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MulPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathPoint3 vmathP3DivPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathPoint3 vmathP3RecipPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathPoint3 vmathP3SqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathPoint3 vmathP3RsqrtPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline VmathPoint3 vmathP3AbsPerElem_V( VmathPoint3 pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline VmathPoint3 vmathP3CopySignPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MaxPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline VmathPoint3 vmathP3MinPerElem_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline float vmathP3MaxElem_V( VmathPoint3 pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline float vmathP3MinElem_V( VmathPoint3 pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline float vmathP3Sum_V( VmathPoint3 pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3Scale_V( VmathPoint3 pnt, float scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline VmathPoint3 vmathP3NonUniformScale_V( VmathPoint3 pnt, VmathVector3 scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline float vmathP3Projection_V( VmathPoint3 pnt, VmathVector3 unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistSqrFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline float vmathP3DistFromOrigin_V( VmathPoint3 pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline float vmathP3DistSqr_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline float vmathP3Dist_V( VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathPoint3 vmathP3Lerp_V( float t, VmathPoint3 pnt0, VmathPoint3 pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathPoint3 vmathP3Select_V( VmathPoint3 pnt0, VmathPoint3 pnt1, unsigned int select1 );
+
+/*
+ * Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+ * The value of the fourth word (the word with the highest address) remains unchanged
+ */
+static inline void vmathP3StoreXYZ_V( VmathPoint3 pnt, vec_float4 *quad );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathP3LoadXYZArray_V( VmathPoint3 *pnt0, VmathPoint3 *pnt1, VmathPoint3 *pnt2, VmathPoint3 *pnt3, const vec_float4 *threeQuads );
+
+/*
+ * Store four 3-D points in three quadwords
+ */
+static inline void vmathP3StoreXYZArray_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, vec_float4 *threeQuads );
+
+/*
+ * Store eight 3-D points as half-floats
+ */
+static inline void vmathP3StoreHalfFloats_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3, VmathPoint3 pnt4, VmathPoint3 pnt5, VmathPoint3 pnt6, VmathPoint3 pnt7, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Print_V( VmathPoint3 pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathP3Prints_V( VmathPoint3 pnt, const char *name );
+
+#endif
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline VmathQuat vmathQMakeFromElems_V( float x, float y, float z, float w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline VmathQuat vmathQMakeFromV3Scalar_V( VmathVector3 xyz, float w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline VmathQuat vmathQMakeFromV4_V( VmathVector4 vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline VmathQuat vmathQMakeFromM3_V( VmathMatrix3 rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline VmathQuat vmathQMakeFromScalar_V( float scalar );
+
+/*
+ * Set vector float data in a quaternion
+ */
+static inline VmathQuat vmathQMakeFrom128_V( vec_float4 vf4 );
+
+/*
+ * Get vector float data from a quaternion
+ */
+static inline vec_float4 vmathQGet128_V( VmathQuat quat );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathQSetXYZ_V( VmathQuat *result, VmathVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline VmathVector3 vmathQGetXYZ_V( VmathQuat quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathQSetX_V( VmathQuat *result, float x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathQSetY_V( VmathQuat *result, float y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathQSetZ_V( VmathQuat *result, float z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathQSetW_V( VmathQuat *result, float w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline float vmathQGetX_V( VmathQuat quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline float vmathQGetY_V( VmathQuat quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline float vmathQGetZ_V( VmathQuat quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline float vmathQGetW_V( VmathQuat quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathQSetElem_V( VmathQuat *result, int idx, float value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline float vmathQGetElem_V( VmathQuat quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline VmathQuat vmathQAdd_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline VmathQuat vmathQSub_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline VmathQuat vmathQMul_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarMul_V( VmathQuat quat, float scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline VmathQuat vmathQScalarDiv_V( VmathQuat quat, float scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline VmathQuat vmathQNeg_V( VmathQuat quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline VmathQuat vmathQMakeIdentity_V( );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline VmathQuat vmathQMakeRotationArc_V( VmathVector3 unitVec0, VmathVector3 unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline VmathQuat vmathQMakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline VmathQuat vmathQMakeRotationX_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline VmathQuat vmathQMakeRotationY_V( float radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline VmathQuat vmathQMakeRotationZ_V( float radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline VmathQuat vmathQConj_V( VmathQuat quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline VmathVector3 vmathQRotate_V( VmathQuat unitQuat, VmathVector3 vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline float vmathQDot_V( VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline float vmathQNorm_V( VmathQuat quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline float vmathQLength_V( VmathQuat quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline VmathQuat vmathQNormalize_V( VmathQuat quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQLerp_V( float t, VmathQuat quat0, VmathQuat quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathQuat vmathQSlerp_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline VmathQuat vmathQSquad_V( float t, VmathQuat unitQuat0, VmathQuat unitQuat1, VmathQuat unitQuat2, VmathQuat unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathQuat vmathQSelect_V( VmathQuat quat0, VmathQuat quat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrint_V( VmathQuat quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathQPrints_V( VmathQuat quat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline VmathMatrix3 vmathM3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeFromQ_V( VmathQuat unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline VmathMatrix3 vmathM3MakeFromScalar_V( float scalar );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol0_V( VmathMatrix3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol1_V( VmathMatrix3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathM3SetCol2_V( VmathMatrix3 *result, VmathVector3 col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol0_V( VmathMatrix3 mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol1_V( VmathMatrix3 mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline VmathVector3 vmathM3GetCol2_V( VmathMatrix3 mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetCol_V( VmathMatrix3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathM3SetRow_V( VmathMatrix3 *result, int row, VmathVector3 vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetCol_V( VmathMatrix3 mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathM3GetRow_V( VmathMatrix3 mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathM3SetElem_V( VmathMatrix3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline float vmathM3GetElem_V( VmathMatrix3 mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Add_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Sub_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Neg_V( VmathMatrix3 mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline VmathMatrix3 vmathM3ScalarMul_V( VmathMatrix3 mat, float scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathM3MulV3_V( VmathMatrix3 mat, VmathVector3 vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline VmathMatrix3 vmathM3Mul_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3MakeIdentity_V( );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix3 vmathM3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix3 vmathM3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix3 vmathM3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline VmathMatrix3 vmathM3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3AppendScale_V( VmathMatrix3 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix3 vmathM3PrependScale_V( VmathVector3 scaleVec, VmathMatrix3 mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline VmathMatrix3 vmathM3MulPerElem_V( VmathMatrix3 mat0, VmathMatrix3 mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline VmathMatrix3 vmathM3AbsPerElem_V( VmathMatrix3 mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline VmathMatrix3 vmathM3Transpose_V( VmathMatrix3 mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix3 vmathM3Inverse_V( VmathMatrix3 mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline float vmathM3Determinant_V( VmathMatrix3 mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathMatrix3 vmathM3Select_V( VmathMatrix3 mat0, VmathMatrix3 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Print_V( VmathMatrix3 mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM3Prints_V( VmathMatrix3 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline VmathMatrix4 vmathM4MakeFromCols_V( VmathVector4 col0, VmathVector4 col1, VmathVector4 col2, VmathVector4 col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MakeFromT3_V( VmathTransform3 mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromM3V3_V( VmathMatrix3 mat, VmathVector3 translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline VmathMatrix4 vmathM4MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetUpper3x3_V( VmathMatrix4 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline VmathMatrix3 vmathM4GetUpper3x3_V( VmathMatrix4 mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathM4SetTranslation_V( VmathMatrix4 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline VmathVector3 vmathM4GetTranslation_V( VmathMatrix4 mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol0_V( VmathMatrix4 *result, VmathVector4 col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol1_V( VmathMatrix4 *result, VmathVector4 col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol2_V( VmathMatrix4 *result, VmathVector4 col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathM4SetCol3_V( VmathMatrix4 *result, VmathVector4 col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol0_V( VmathMatrix4 mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol1_V( VmathMatrix4 mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol2_V( VmathMatrix4 mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline VmathVector4 vmathM4GetCol3_V( VmathMatrix4 mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetCol_V( VmathMatrix4 *result, int col, VmathVector4 vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathM4SetRow_V( VmathMatrix4 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetCol_V( VmathMatrix4 mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathM4GetRow_V( VmathMatrix4 mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathM4SetElem_V( VmathMatrix4 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline float vmathM4GetElem_V( VmathMatrix4 mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Add_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Sub_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Neg_V( VmathMatrix4 mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline VmathMatrix4 vmathM4ScalarMul_V( VmathMatrix4 mat, float scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline VmathVector4 vmathM4MulV4_V( VmathMatrix4 mat, VmathVector4 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline VmathVector4 vmathM4MulV3_V( VmathMatrix4 mat, VmathVector3 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline VmathVector4 vmathM4MulP3_V( VmathMatrix4 mat, VmathPoint3 pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline VmathMatrix4 vmathM4Mul_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline VmathMatrix4 vmathM4MulT3_V( VmathMatrix4 mat, VmathTransform3 tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4MakeIdentity_V( );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationX_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationY_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathMatrix4 vmathM4MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathMatrix4 vmathM4MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathMatrix4 vmathM4MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline VmathMatrix4 vmathM4MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline VmathMatrix4 vmathM4MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline VmathMatrix4 vmathM4MakeLookAt_V( VmathPoint3 eyePos, VmathPoint3 lookAtPos, VmathVector3 upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakePerspective_V( float fovyRadians, float aspect, float zNear, float zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline VmathMatrix4 vmathM4MakeFrustum_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline VmathMatrix4 vmathM4MakeOrthographic_V( float left, float right, float bottom, float top, float zNear, float zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4AppendScale_V( VmathMatrix4 mat, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathMatrix4 vmathM4PrependScale_V( VmathVector3 scaleVec, VmathMatrix4 mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline VmathMatrix4 vmathM4MulPerElem_V( VmathMatrix4 mat0, VmathMatrix4 mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline VmathMatrix4 vmathM4AbsPerElem_V( VmathMatrix4 mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline VmathMatrix4 vmathM4Transpose_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4Inverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathMatrix4 vmathM4AffineInverse_V( VmathMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline VmathMatrix4 vmathM4OrthoInverse_V( VmathMatrix4 mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline float vmathM4Determinant_V( VmathMatrix4 mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathMatrix4 vmathM4Select_V( VmathMatrix4 mat0, VmathMatrix4 mat1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Print_V( VmathMatrix4 mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathM4Prints_V( VmathMatrix4 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline VmathTransform3 vmathT3MakeFromCols_V( VmathVector3 col0, VmathVector3 col1, VmathVector3 col2, VmathVector3 col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromM3V3_V( VmathMatrix3 tfrm, VmathVector3 translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeFromQV3_V( VmathQuat unitQuat, VmathVector3 translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline VmathTransform3 vmathT3MakeFromScalar_V( float scalar );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathT3SetUpper3x3_V( VmathTransform3 *result, VmathMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline VmathMatrix3 vmathT3GetUpper3x3_V( VmathTransform3 tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathT3SetTranslation_V( VmathTransform3 *result, VmathVector3 translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetTranslation_V( VmathTransform3 tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol0_V( VmathTransform3 *result, VmathVector3 col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol1_V( VmathTransform3 *result, VmathVector3 col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol2_V( VmathTransform3 *result, VmathVector3 col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathT3SetCol3_V( VmathTransform3 *result, VmathVector3 col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol0_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol1_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol2_V( VmathTransform3 tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline VmathVector3 vmathT3GetCol3_V( VmathTransform3 tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetCol_V( VmathTransform3 *result, int col, VmathVector3 vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathT3SetRow_V( VmathTransform3 *result, int row, VmathVector4 vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector3 vmathT3GetCol_V( VmathTransform3 tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathVector4 vmathT3GetRow_V( VmathTransform3 tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathT3SetElem_V( VmathTransform3 *result, int col, int row, float val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline float vmathT3GetElem_V( VmathTransform3 tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline VmathVector3 vmathT3MulV3_V( VmathTransform3 tfrm, VmathVector3 vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline VmathPoint3 vmathT3MulP3_V( VmathTransform3 tfrm, VmathPoint3 pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline VmathTransform3 vmathT3Mul_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline VmathTransform3 vmathT3MakeIdentity_V( );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationX_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationY_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline VmathTransform3 vmathT3MakeRotationZ_V( float radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline VmathTransform3 vmathT3MakeRotationZYX_V( VmathVector3 radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathTransform3 vmathT3MakeRotationAxis_V( float radians, VmathVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathTransform3 vmathT3MakeRotationQ_V( VmathQuat unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline VmathTransform3 vmathT3MakeScale_V( VmathVector3 scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline VmathTransform3 vmathT3MakeTranslation_V( VmathVector3 translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3AppendScale_V( VmathTransform3 tfrm, VmathVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathTransform3 vmathT3PrependScale_V( VmathVector3 scaleVec, VmathTransform3 tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline VmathTransform3 vmathT3MulPerElem_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline VmathTransform3 vmathT3AbsPerElem_V( VmathTransform3 tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline VmathTransform3 vmathT3Inverse_V( VmathTransform3 tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline VmathTransform3 vmathT3OrthoInverse_V( VmathTransform3 tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathTransform3 vmathT3Select_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, unsigned int select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Print_V( VmathTransform3 tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathT3Prints_V( VmathTransform3 tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vectormath_aos.h"
+#include "vec_aos_v.h"
+#include "quat_aos_v.h"
+#include "mat_aos_v.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa.h
index c82afa234..ad1e841d3 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa.h
@@ -1,2012 +1,2012 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_C_H
-#define _VECTORMATH_SOA_C_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-#include "vectormath_aos.h"
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_SOA_C_TYPES_H
-#define _VECTORMATH_SOA_C_TYPES_H
-
-/* A set of four 3-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaVector3;
-
-/* A set of four 4-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector4
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaVector4;
-
-/* A set of four 3-D points in structure-of-arrays format
- */
-typedef struct _VmathSoaPoint3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaPoint3;
-
-/* A set of four quaternions in structure-of-arrays format
- */
-typedef struct _VmathSoaQuat
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaQuat;
-
-/* A set of four 3x3 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-} VmathSoaMatrix3;
-
-/* A set of four 4x4 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix4
-{
-    VmathSoaVector4 col0;
-    VmathSoaVector4 col1;
-    VmathSoaVector4 col2;
-    VmathSoaVector4 col3;
-} VmathSoaMatrix4;
-
-/* A set of four 3x4 transformation matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaTransform3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-    VmathSoaVector3 col3;
-} VmathSoaTransform3;
-
-#endif
-
-/*
- * Copy a 3-D vector
- */
-static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D vector
- */
-static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec );
-
-/*
- * Insert four AoS 3-D vectors
- */
-static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 );
-
-/*
- * Extract four AoS 3-D vectors
- */
-static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D vector in three quadwords
- */
-static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D vectors as half-floats
- */
-static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Print( const VmathSoaVector3 *vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 4-D vector
- */
-static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 4-D vector
- */
-static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec );
-
-/*
- * Insert four AoS 4-D vectors
- */
-static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 );
-
-/*
- * Extract four AoS 4-D vectors
- */
-static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Construct x axis
- */
-static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result );
-
-/*
- * Construct y axis
- */
-static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result );
-
-/*
- * Construct z axis
- */
-static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result );
-
-/*
- * Construct w axis
- */
-static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 );
-
-/*
- * Store four slots of an SoA 4-D vector as half-floats
- */
-static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Print( const VmathSoaVector4 *vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name );
-
-#endif
-
-/*
- * Copy a 3-D point
- */
-static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D point
- */
-static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt );
-
-/*
- * Insert four AoS 3-D points
- */
-static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 );
-
-/*
- * Extract four AoS 3-D points
- */
-static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D point in three quadwords
- */
-static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *pnt, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D points as half-floats
- */
-static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name );
-
-#endif
-
-/*
- * Copy a quaternion
- */
-static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS quaternion
- */
-static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat );
-
-/*
- * Insert four AoS quaternions
- */
-static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 );
-
-/*
- * Extract four AoS quaternions
- */
-static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrint( const VmathSoaQuat *quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x3 matrix
- */
-static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x3 matrix
- */
-static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat );
-
-/*
- * Insert four AoS 3x3 matrices
- */
-static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 );
-
-/*
- * Extract four AoS 3x3 matrices
- */
-static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 4x4 matrix
- */
-static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0, const VmathSoaVector4 *col1, const VmathSoaVector4 *col2, const VmathSoaVector4 *col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 4x4 matrix
- */
-static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat );
-
-/*
- * Insert four AoS 4x4 matrices
- */
-static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 );
-
-/*
- * Extract four AoS 4x4 matrices
- */
-static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name );
-
-#endif
-
-/*
- * Copy a 3x4 transformation matrix
- */
-static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2, const VmathSoaVector3 *col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x4 transformation matrix
- */
-static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm );
-
-/*
- * Insert four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 );
-
-/*
- * Extract four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vec_soa.h"
-#include "quat_soa.h"
-#include "mat_soa.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_C_SPU_H
+#define _VECTORMATH_SOA_C_SPU_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+#include "vectormath_aos.h"
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_SOA_C_TYPES_H
+#define _VECTORMATH_SOA_C_TYPES_H
+
+/* A set of four 3-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaVector3;
+
+/* A set of four 4-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector4
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaVector4;
+
+/* A set of four 3-D points in structure-of-arrays format
+ */
+typedef struct _VmathSoaPoint3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaPoint3;
+
+/* A set of four quaternions in structure-of-arrays format
+ */
+typedef struct _VmathSoaQuat
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaQuat;
+
+/* A set of four 3x3 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+} VmathSoaMatrix3;
+
+/* A set of four 4x4 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix4
+{
+    VmathSoaVector4 col0;
+    VmathSoaVector4 col1;
+    VmathSoaVector4 col2;
+    VmathSoaVector4 col3;
+} VmathSoaMatrix4;
+
+/* A set of four 3x4 transformation matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaTransform3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+    VmathSoaVector3 col3;
+} VmathSoaTransform3;
+
+#endif
+
+/*
+ * Copy a 3-D vector
+ */
+static inline void vmathSoaV3Copy( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline void vmathSoaV3MakeFromElems( VmathSoaVector3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline void vmathSoaV3MakeFromP3( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline void vmathSoaV3MakeFromScalar( VmathSoaVector3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D vector
+ */
+static inline void vmathSoaV3MakeFromAos( VmathSoaVector3 *result, const VmathVector3 *vec );
+
+/*
+ * Insert four AoS 3-D vectors
+ */
+static inline void vmathSoaV3MakeFrom4Aos( VmathSoaVector3 *result, const VmathVector3 *vec0, const VmathVector3 *vec1, const VmathVector3 *vec2, const VmathVector3 *vec3 );
+
+/*
+ * Extract four AoS 3-D vectors
+ */
+static inline void vmathSoaV3Get4Aos( const VmathSoaVector3 *vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathSoaV3SetX( VmathSoaVector3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathSoaV3SetY( VmathSoaVector3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathSoaV3SetZ( VmathSoaVector3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetX( const VmathSoaVector3 *vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetY( const VmathSoaVector3 *vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetZ( const VmathSoaVector3 *vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathSoaV3SetElem( VmathSoaVector3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline vec_float4 vmathSoaV3GetElem( const VmathSoaVector3 *vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline void vmathSoaV3Add( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline void vmathSoaV3Sub( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline void vmathSoaV3AddP3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline void vmathSoaV3ScalarMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline void vmathSoaV3ScalarDiv( VmathSoaVector3 *result, const VmathSoaVector3 *vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline void vmathSoaV3Neg( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathSoaV3MakeXAxis( VmathSoaVector3 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathSoaV3MakeYAxis( VmathSoaVector3 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathSoaV3MakeZAxis( VmathSoaVector3 *result );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline void vmathSoaV3MulPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaV3DivPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaV3RecipPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaV3SqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaV3RsqrtPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline void vmathSoaV3AbsPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline void vmathSoaV3CopySignPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline void vmathSoaV3MaxPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline void vmathSoaV3MinPerElem( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MaxElem( const VmathSoaVector3 *vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MinElem( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Sum( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline vec_float4 vmathSoaV3Dot( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3LengthSqr( const VmathSoaVector3 *vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Length( const VmathSoaVector3 *vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathSoaV3Normalize( VmathSoaVector3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline void vmathSoaV3Cross( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline void vmathSoaV3Outer( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline void vmathSoaV3RowMul( VmathSoaVector3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline void vmathSoaV3CrossMatrix( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline void vmathSoaV3CrossMatrixMul( VmathSoaMatrix3 *result, const VmathSoaVector3 *vec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV3Lerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV3Slerp( VmathSoaVector3 *result, vec_float4 t, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaV3Select( VmathSoaVector3 *result, const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathSoaV3LoadXYZArray( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D vector in three quadwords
+ */
+static inline void vmathSoaV3StoreXYZArray( const VmathSoaVector3 *vec, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D vectors as half-floats
+ */
+static inline void vmathSoaV3StoreHalfFloats( const VmathSoaVector3 *vec0, const VmathSoaVector3 *vec1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Print( const VmathSoaVector3 *vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Prints( const VmathSoaVector3 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 4-D vector
+ */
+static inline void vmathSoaV4Copy( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline void vmathSoaV4MakeFromElems( VmathSoaVector4 *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline void vmathSoaV4MakeFromV3Scalar( VmathSoaVector4 *result, const VmathSoaVector3 *xyz, vec_float4 w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline void vmathSoaV4MakeFromV3( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline void vmathSoaV4MakeFromP3( VmathSoaVector4 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline void vmathSoaV4MakeFromQ( VmathSoaVector4 *result, const VmathSoaQuat *quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline void vmathSoaV4MakeFromScalar( VmathSoaVector4 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4-D vector
+ */
+static inline void vmathSoaV4MakeFromAos( VmathSoaVector4 *result, const VmathVector4 *vec );
+
+/*
+ * Insert four AoS 4-D vectors
+ */
+static inline void vmathSoaV4MakeFrom4Aos( VmathSoaVector4 *result, const VmathVector4 *vec0, const VmathVector4 *vec1, const VmathVector4 *vec2, const VmathVector4 *vec3 );
+
+/*
+ * Extract four AoS 4-D vectors
+ */
+static inline void vmathSoaV4Get4Aos( const VmathSoaVector4 *vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaV4SetXYZ( VmathSoaVector4 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline void vmathSoaV4GetXYZ( VmathSoaVector3 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathSoaV4SetX( VmathSoaVector4 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathSoaV4SetY( VmathSoaVector4 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathSoaV4SetZ( VmathSoaVector4 *result, vec_float4 z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathSoaV4SetW( VmathSoaVector4 *result, vec_float4 w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetX( const VmathSoaVector4 *vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetY( const VmathSoaVector4 *vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetZ( const VmathSoaVector4 *vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetW( const VmathSoaVector4 *vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathSoaV4SetElem( VmathSoaVector4 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline vec_float4 vmathSoaV4GetElem( const VmathSoaVector4 *vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline void vmathSoaV4Add( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline void vmathSoaV4Sub( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline void vmathSoaV4ScalarMul( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline void vmathSoaV4ScalarDiv( VmathSoaVector4 *result, const VmathSoaVector4 *vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline void vmathSoaV4Neg( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Construct x axis
+ */
+static inline void vmathSoaV4MakeXAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct y axis
+ */
+static inline void vmathSoaV4MakeYAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct z axis
+ */
+static inline void vmathSoaV4MakeZAxis( VmathSoaVector4 *result );
+
+/*
+ * Construct w axis
+ */
+static inline void vmathSoaV4MakeWAxis( VmathSoaVector4 *result );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline void vmathSoaV4MulPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaV4DivPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaV4RecipPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaV4SqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaV4RsqrtPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline void vmathSoaV4AbsPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline void vmathSoaV4CopySignPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline void vmathSoaV4MaxPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline void vmathSoaV4MinPerElem( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MaxElem( const VmathSoaVector4 *vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MinElem( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Sum( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline vec_float4 vmathSoaV4Dot( const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4LengthSqr( const VmathSoaVector4 *vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Length( const VmathSoaVector4 *vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline void vmathSoaV4Normalize( VmathSoaVector4 *result, const VmathSoaVector4 *vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline void vmathSoaV4Outer( VmathSoaMatrix4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV4Lerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaV4Slerp( VmathSoaVector4 *result, vec_float4 t, const VmathSoaVector4 *unitVec0, const VmathSoaVector4 *unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaV4Select( VmathSoaVector4 *result, const VmathSoaVector4 *vec0, const VmathSoaVector4 *vec1, vec_uint4 select1 );
+
+/*
+ * Store four slots of an SoA 4-D vector as half-floats
+ */
+static inline void vmathSoaV4StoreHalfFloats( const VmathSoaVector4 *vec, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Print( const VmathSoaVector4 *vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Prints( const VmathSoaVector4 *vec, const char *name );
+
+#endif
+
+/*
+ * Copy a 3-D point
+ */
+static inline void vmathSoaP3Copy( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline void vmathSoaP3MakeFromElems( VmathSoaPoint3 *result, vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline void vmathSoaP3MakeFromV3( VmathSoaPoint3 *result, const VmathSoaVector3 *vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline void vmathSoaP3MakeFromScalar( VmathSoaPoint3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D point
+ */
+static inline void vmathSoaP3MakeFromAos( VmathSoaPoint3 *result, const VmathPoint3 *pnt );
+
+/*
+ * Insert four AoS 3-D points
+ */
+static inline void vmathSoaP3MakeFrom4Aos( VmathSoaPoint3 *result, const VmathPoint3 *pnt0, const VmathPoint3 *pnt1, const VmathPoint3 *pnt2, const VmathPoint3 *pnt3 );
+
+/*
+ * Extract four AoS 3-D points
+ */
+static inline void vmathSoaP3Get4Aos( const VmathSoaPoint3 *pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathSoaP3SetX( VmathSoaPoint3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathSoaP3SetY( VmathSoaPoint3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathSoaP3SetZ( VmathSoaPoint3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetX( const VmathSoaPoint3 *pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetY( const VmathSoaPoint3 *pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetZ( const VmathSoaPoint3 *pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathSoaP3SetElem( VmathSoaPoint3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline vec_float4 vmathSoaP3GetElem( const VmathSoaPoint3 *pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline void vmathSoaP3Sub( VmathSoaVector3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline void vmathSoaP3AddV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline void vmathSoaP3SubV3( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline void vmathSoaP3MulPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline void vmathSoaP3DivPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline void vmathSoaP3RecipPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline void vmathSoaP3SqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline void vmathSoaP3RsqrtPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline void vmathSoaP3AbsPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline void vmathSoaP3CopySignPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline void vmathSoaP3MaxPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline void vmathSoaP3MinPerElem( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MaxElem( const VmathSoaPoint3 *pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MinElem( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3Sum( const VmathSoaPoint3 *pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline void vmathSoaP3Scale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, vec_float4 scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline void vmathSoaP3NonUniformScale( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline vec_float4 vmathSoaP3Projection( const VmathSoaPoint3 *pnt, const VmathSoaVector3 *unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistFromOrigin( const VmathSoaPoint3 *pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3DistSqr( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3Dist( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaP3Lerp( VmathSoaPoint3 *result, vec_float4 t, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaP3Select( VmathSoaPoint3 *result, const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathSoaP3LoadXYZArray( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D point in three quadwords
+ */
+static inline void vmathSoaP3StoreXYZArray( const VmathSoaPoint3 *pnt, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D points as half-floats
+ */
+static inline void vmathSoaP3StoreHalfFloats( const VmathSoaPoint3 *pnt0, const VmathSoaPoint3 *pnt1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Print( const VmathSoaPoint3 *pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Prints( const VmathSoaPoint3 *pnt, const char *name );
+
+#endif
+
+/*
+ * Copy a quaternion
+ */
+static inline void vmathSoaQCopy( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline void vmathSoaQMakeFromElems( VmathSoaQuat *result, vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline void vmathSoaQMakeFromV3Scalar( VmathSoaQuat *result, const VmathSoaVector3 *xyz, vec_float4 w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline void vmathSoaQMakeFromV4( VmathSoaQuat *result, const VmathSoaVector4 *vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline void vmathSoaQMakeFromM3( VmathSoaQuat *result, const VmathSoaMatrix3 *rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline void vmathSoaQMakeFromScalar( VmathSoaQuat *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS quaternion
+ */
+static inline void vmathSoaQMakeFromAos( VmathSoaQuat *result, const VmathQuat *quat );
+
+/*
+ * Insert four AoS quaternions
+ */
+static inline void vmathSoaQMakeFrom4Aos( VmathSoaQuat *result, const VmathQuat *quat0, const VmathQuat *quat1, const VmathQuat *quat2, const VmathQuat *quat3 );
+
+/*
+ * Extract four AoS quaternions
+ */
+static inline void vmathSoaQGet4Aos( const VmathSoaQuat *quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaQSetXYZ( VmathSoaQuat *result, const VmathSoaVector3 *vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline void vmathSoaQGetXYZ( VmathSoaVector3 *result, const VmathSoaQuat *quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathSoaQSetX( VmathSoaQuat *result, vec_float4 x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathSoaQSetY( VmathSoaQuat *result, vec_float4 y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathSoaQSetZ( VmathSoaQuat *result, vec_float4 z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathSoaQSetW( VmathSoaQuat *result, vec_float4 w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetX( const VmathSoaQuat *quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetY( const VmathSoaQuat *quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetZ( const VmathSoaQuat *quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetW( const VmathSoaQuat *quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathSoaQSetElem( VmathSoaQuat *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline vec_float4 vmathSoaQGetElem( const VmathSoaQuat *quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline void vmathSoaQAdd( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline void vmathSoaQSub( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline void vmathSoaQMul( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline void vmathSoaQScalarMul( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline void vmathSoaQScalarDiv( VmathSoaQuat *result, const VmathSoaQuat *quat, vec_float4 scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline void vmathSoaQNeg( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline void vmathSoaQMakeIdentity( VmathSoaQuat *result );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline void vmathSoaQMakeRotationArc( VmathSoaQuat *result, const VmathSoaVector3 *unitVec0, const VmathSoaVector3 *unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaQMakeRotationAxis( VmathSoaQuat *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline void vmathSoaQMakeRotationX( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline void vmathSoaQMakeRotationY( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline void vmathSoaQMakeRotationZ( VmathSoaQuat *result, vec_float4 radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline void vmathSoaQConj( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline void vmathSoaQRotate( VmathSoaVector3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline vec_float4 vmathSoaQDot( const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline vec_float4 vmathSoaQNorm( const VmathSoaQuat *quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline vec_float4 vmathSoaQLength( const VmathSoaQuat *quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline void vmathSoaQNormalize( VmathSoaQuat *result, const VmathSoaQuat *quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaQLerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline void vmathSoaQSlerp( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline void vmathSoaQSquad( VmathSoaQuat *result, vec_float4 t, const VmathSoaQuat *unitQuat0, const VmathSoaQuat *unitQuat1, const VmathSoaQuat *unitQuat2, const VmathSoaQuat *unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaQSelect( VmathSoaQuat *result, const VmathSoaQuat *quat0, const VmathSoaQuat *quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrint( const VmathSoaQuat *quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrints( const VmathSoaQuat *quat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x3 matrix
+ */
+static inline void vmathSoaM3Copy( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline void vmathSoaM3MakeFromCols( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM3MakeFromQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline void vmathSoaM3MakeFromScalar( VmathSoaMatrix3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x3 matrix
+ */
+static inline void vmathSoaM3MakeFromAos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat );
+
+/*
+ * Insert four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3MakeFrom4Aos( VmathSoaMatrix3 *result, const VmathMatrix3 *mat0, const VmathMatrix3 *mat1, const VmathMatrix3 *mat2, const VmathMatrix3 *mat3 );
+
+/*
+ * Extract four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3Get4Aos( const VmathSoaMatrix3 *mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol0( VmathSoaMatrix3 *result, const VmathSoaVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol1( VmathSoaMatrix3 *result, const VmathSoaVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol2( VmathSoaMatrix3 *result, const VmathSoaVector3 *col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol0( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol1( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3GetCol2( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetCol( VmathSoaMatrix3 *result, int col, const VmathSoaVector3 *vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetRow( VmathSoaMatrix3 *result, int row, const VmathSoaVector3 *vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3GetCol( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3GetRow( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM3SetElem( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM3GetElem( const VmathSoaMatrix3 *mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline void vmathSoaM3Add( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline void vmathSoaM3Sub( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline void vmathSoaM3Neg( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline void vmathSoaM3ScalarMul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, vec_float4 scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline void vmathSoaM3MulV3( VmathSoaVector3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline void vmathSoaM3Mul( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline void vmathSoaM3MakeIdentity( VmathSoaMatrix3 *result );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline void vmathSoaM3MakeRotationX( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline void vmathSoaM3MakeRotationY( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline void vmathSoaM3MakeRotationZ( VmathSoaMatrix3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaM3MakeRotationZYX( VmathSoaMatrix3 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaM3MakeRotationAxis( VmathSoaMatrix3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM3MakeRotationQ( VmathSoaMatrix3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline void vmathSoaM3MakeScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM3AppendScale( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM3PrependScale( VmathSoaMatrix3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix3 *mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline void vmathSoaM3MulPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline void vmathSoaM3AbsPerElem( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline void vmathSoaM3Transpose( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM3Inverse( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline vec_float4 vmathSoaM3Determinant( const VmathSoaMatrix3 *mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaM3Select( VmathSoaMatrix3 *result, const VmathSoaMatrix3 *mat0, const VmathSoaMatrix3 *mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Print( const VmathSoaMatrix3 *mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Prints( const VmathSoaMatrix3 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 4x4 matrix
+ */
+static inline void vmathSoaM4Copy( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline void vmathSoaM4MakeFromCols( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0, const VmathSoaVector4 *col1, const VmathSoaVector4 *col2, const VmathSoaVector4 *col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline void vmathSoaM4MakeFromT3( VmathSoaMatrix4 *result, const VmathSoaTransform3 *mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathSoaM4MakeFromM3V3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathSoaM4MakeFromQV3( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline void vmathSoaM4MakeFromScalar( VmathSoaMatrix4 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4x4 matrix
+ */
+static inline void vmathSoaM4MakeFromAos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat );
+
+/*
+ * Insert four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4MakeFrom4Aos( VmathSoaMatrix4 *result, const VmathMatrix4 *mat0, const VmathMatrix4 *mat1, const VmathMatrix4 *mat2, const VmathMatrix4 *mat3 );
+
+/*
+ * Extract four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4Get4Aos( const VmathSoaMatrix4 *mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetUpper3x3( VmathSoaMatrix4 *result, const VmathSoaMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetTranslation( VmathSoaVector3 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol0( VmathSoaMatrix4 *result, const VmathSoaVector4 *col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol1( VmathSoaMatrix4 *result, const VmathSoaVector4 *col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol2( VmathSoaMatrix4 *result, const VmathSoaVector4 *col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol3( VmathSoaMatrix4 *result, const VmathSoaVector4 *col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol0( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol1( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol2( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4GetCol3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetCol( VmathSoaMatrix4 *result, int col, const VmathSoaVector4 *vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetRow( VmathSoaMatrix4 *result, int row, const VmathSoaVector4 *vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4GetCol( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4GetRow( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM4SetElem( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM4GetElem( const VmathSoaMatrix4 *mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline void vmathSoaM4Add( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline void vmathSoaM4Sub( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline void vmathSoaM4Neg( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline void vmathSoaM4ScalarMul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, vec_float4 scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline void vmathSoaM4MulV4( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector4 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline void vmathSoaM4MulV3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline void vmathSoaM4MulP3( VmathSoaVector4 *result, const VmathSoaMatrix4 *mat, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline void vmathSoaM4Mul( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline void vmathSoaM4MulT3( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline void vmathSoaM4MakeIdentity( VmathSoaMatrix4 *result );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline void vmathSoaM4MakeRotationX( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline void vmathSoaM4MakeRotationY( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline void vmathSoaM4MakeRotationZ( VmathSoaMatrix4 *result, vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaM4MakeRotationZYX( VmathSoaMatrix4 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaM4MakeRotationAxis( VmathSoaMatrix4 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaM4MakeRotationQ( VmathSoaMatrix4 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline void vmathSoaM4MakeScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline void vmathSoaM4MakeTranslation( VmathSoaMatrix4 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline void vmathSoaM4MakeLookAt( VmathSoaMatrix4 *result, const VmathSoaPoint3 *eyePos, const VmathSoaPoint3 *lookAtPos, const VmathSoaVector3 *upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline void vmathSoaM4MakePerspective( VmathSoaMatrix4 *result, vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline void vmathSoaM4MakeFrustum( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline void vmathSoaM4MakeOrthographic( VmathSoaMatrix4 *result, vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM4AppendScale( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaM4PrependScale( VmathSoaMatrix4 *result, const VmathSoaVector3 *scaleVec, const VmathSoaMatrix4 *mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline void vmathSoaM4MulPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline void vmathSoaM4AbsPerElem( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline void vmathSoaM4Transpose( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM4Inverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline void vmathSoaM4AffineInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline void vmathSoaM4OrthoInverse( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline vec_float4 vmathSoaM4Determinant( const VmathSoaMatrix4 *mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaM4Select( VmathSoaMatrix4 *result, const VmathSoaMatrix4 *mat0, const VmathSoaMatrix4 *mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Print( const VmathSoaMatrix4 *mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Prints( const VmathSoaMatrix4 *mat, const char *name );
+
+#endif
+
+/*
+ * Copy a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3Copy( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline void vmathSoaT3MakeFromCols( VmathSoaTransform3 *result, const VmathSoaVector3 *col0, const VmathSoaVector3 *col1, const VmathSoaVector3 *col2, const VmathSoaVector3 *col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline void vmathSoaT3MakeFromM3V3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *tfrm, const VmathSoaVector3 *translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline void vmathSoaT3MakeFromQV3( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat, const VmathSoaVector3 *translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline void vmathSoaT3MakeFromScalar( VmathSoaTransform3 *result, vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x4 transformation matrix
+ */
+static inline void vmathSoaT3MakeFromAos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm );
+
+/*
+ * Insert four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3MakeFrom4Aos( VmathSoaTransform3 *result, const VmathTransform3 *tfrm0, const VmathTransform3 *tfrm1, const VmathTransform3 *tfrm2, const VmathTransform3 *tfrm3 );
+
+/*
+ * Extract four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Get4Aos( const VmathSoaTransform3 *tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathSoaT3SetUpper3x3( VmathSoaTransform3 *result, const VmathSoaMatrix3 *mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetUpper3x3( VmathSoaMatrix3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathSoaT3SetTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetTranslation( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol0( VmathSoaTransform3 *result, const VmathSoaVector3 *col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol1( VmathSoaTransform3 *result, const VmathSoaVector3 *col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol2( VmathSoaTransform3 *result, const VmathSoaVector3 *col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol3( VmathSoaTransform3 *result, const VmathSoaVector3 *col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol0( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol1( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol2( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3GetCol3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetCol( VmathSoaTransform3 *result, int col, const VmathSoaVector3 *vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetRow( VmathSoaTransform3 *result, int row, const VmathSoaVector4 *vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3GetCol( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3GetRow( VmathSoaVector4 *result, const VmathSoaTransform3 *tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathSoaT3SetElem( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaT3GetElem( const VmathSoaTransform3 *tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline void vmathSoaT3MulV3( VmathSoaVector3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline void vmathSoaT3MulP3( VmathSoaPoint3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaPoint3 *pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Mul( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline void vmathSoaT3MakeIdentity( VmathSoaTransform3 *result );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline void vmathSoaT3MakeRotationX( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline void vmathSoaT3MakeRotationY( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline void vmathSoaT3MakeRotationZ( VmathSoaTransform3 *result, vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline void vmathSoaT3MakeRotationZYX( VmathSoaTransform3 *result, const VmathSoaVector3 *radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline void vmathSoaT3MakeRotationAxis( VmathSoaTransform3 *result, vec_float4 radians, const VmathSoaVector3 *unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline void vmathSoaT3MakeRotationQ( VmathSoaTransform3 *result, const VmathSoaQuat *unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline void vmathSoaT3MakeScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline void vmathSoaT3MakeTranslation( VmathSoaTransform3 *result, const VmathSoaVector3 *translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaT3AppendScale( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm, const VmathSoaVector3 *scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline void vmathSoaT3PrependScale( VmathSoaTransform3 *result, const VmathSoaVector3 *scaleVec, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline void vmathSoaT3MulPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline void vmathSoaT3AbsPerElem( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline void vmathSoaT3Inverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline void vmathSoaT3OrthoInverse( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline void vmathSoaT3Select( VmathSoaTransform3 *result, const VmathSoaTransform3 *tfrm0, const VmathSoaTransform3 *tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Print( const VmathSoaTransform3 *tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Prints( const VmathSoaTransform3 *tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vec_soa.h"
+#include "quat_soa.h"
+#include "mat_soa.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa_v.h b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa_v.h
index 3cb69e040..17065dd5e 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa_v.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/c/vectormath_soa_v.h
@@ -1,1978 +1,1978 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_C_V_H
-#define _VECTORMATH_SOA_C_V_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-#include "vectormath_aos_v.h"
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-#ifndef _VECTORMATH_SOA_C_TYPES_H
-#define _VECTORMATH_SOA_C_TYPES_H
-
-/* A set of four 3-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaVector3;
-
-/* A set of four 4-D vectors in structure-of-arrays format
- */
-typedef struct _VmathSoaVector4
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaVector4;
-
-/* A set of four 3-D points in structure-of-arrays format
- */
-typedef struct _VmathSoaPoint3
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-} VmathSoaPoint3;
-
-/* A set of four quaternions in structure-of-arrays format
- */
-typedef struct _VmathSoaQuat
-{
-    vec_float4 x;
-    vec_float4 y;
-    vec_float4 z;
-    vec_float4 w;
-} VmathSoaQuat;
-
-/* A set of four 3x3 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-} VmathSoaMatrix3;
-
-/* A set of four 4x4 matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaMatrix4
-{
-    VmathSoaVector4 col0;
-    VmathSoaVector4 col1;
-    VmathSoaVector4 col2;
-    VmathSoaVector4 col3;
-} VmathSoaMatrix4;
-
-/* A set of four 3x4 transformation matrices in structure-of-arrays format
- */
-typedef struct _VmathSoaTransform3
-{
-    VmathSoaVector3 col0;
-    VmathSoaVector3 col1;
-    VmathSoaVector3 col2;
-    VmathSoaVector3 col3;
-} VmathSoaTransform3;
-
-#endif
-
-/*
- * Construct a 3-D vector from x, y, and z elements
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D point into a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt );
-
-/*
- * Set all elements of a 3-D vector to the same scalar value
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec );
-
-/*
- * Insert four AoS 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 );
-
-/*
- * Extract four AoS 3-D vectors
- */
-static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
-
-/*
- * Set the x element of a 3-D vector
- */
-static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D vector
- */
-static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D vector
- */
-static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec );
-
-/*
- * Get the y element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec );
-
-/*
- * Get the z element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec );
-
-/*
- * Set an x, y, or z element of a 3-D vector by index
- */
-static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D vector by index
- */
-static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx );
-
-/*
- * Add two 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Subtract a 3-D vector from another 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Add a 3-D vector to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt );
-
-/*
- * Multiply a 3-D vector by a scalar
- */
-static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar );
-
-/*
- * Divide a 3-D vector by a scalar
- */
-static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( );
-
-/*
- * Multiply two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Divide two 3-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Compute the reciprocal of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the reciprocal square root of a 3-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the absolute value of a 3-D vector per element
- */
-static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec );
-
-/*
- * Copy sign from one 3-D vector to another, per element
- */
-static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Maximum of two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Minimum of two 3-D vectors per element
- */
-static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Maximum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec );
-
-/*
- * Minimum element of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec );
-
-/*
- * Compute the sum of all elements of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec );
-
-/*
- * Compute the dot product of two 3-D vectors
- */
-static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Compute the square of the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec );
-
-/*
- * Compute the length of a 3-D vector
- */
-static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec );
-
-/*
- * Normalize a 3-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec );
-
-/*
- * Compute cross product of two 3-D vectors
- */
-static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Outer product of two 3-D vectors
- */
-static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Pre-multiply a row vector by a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
-
-/*
- * Cross-product matrix of a 3-D vector
- */
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec );
-
-/*
- * Create cross-product matrix and multiply
- * NOTE: 
- * Faster than separately creating a cross-product matrix and multiplying.
- */
-static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
-
-/*
- * Linear interpolation between two 3-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
-
-/*
- * Spherical linear interpolation between two 3-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
-
-/*
- * Conditionally select between two 3-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D vectors, stored in three quadwords
- */
-static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D vector in three quadwords
- */
-static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D vectors as half-floats
- */
-static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Print_V( VmathSoaVector3 vec );
-
-/*
- * Print a 3-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 4-D vector from x, y, z, and w elements
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a 4-D vector from a 3-D vector and a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
-
-/*
- * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec );
-
-/*
- * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt );
-
-/*
- * Copy elements from a quaternion into a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat );
-
-/*
- * Set all elements of a 4-D vector to the same scalar value
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec );
-
-/*
- * Insert four AoS 4-D vectors
- */
-static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 );
-
-/*
- * Extract four AoS 4-D vectors
- */
-static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
-
-/*
- * Set the x, y, and z elements of a 4-D vector
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec );
-
-/*
- * Get the x, y, and z elements of a 4-D vector
- */
-static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec );
-
-/*
- * Set the x element of a 4-D vector
- */
-static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 x );
-
-/*
- * Set the y element of a 4-D vector
- */
-static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 y );
-
-/*
- * Set the z element of a 4-D vector
- */
-static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 z );
-
-/*
- * Set the w element of a 4-D vector
- */
-static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 w );
-
-/*
- * Get the x element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec );
-
-/*
- * Get the y element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec );
-
-/*
- * Get the z element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec );
-
-/*
- * Get the w element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec );
-
-/*
- * Set an x, y, z, or w element of a 4-D vector by index
- */
-static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a 4-D vector by index
- */
-static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx );
-
-/*
- * Add two 4-D vectors
- */
-static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Subtract a 4-D vector from another 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Multiply a 4-D vector by a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar );
-
-/*
- * Divide a 4-D vector by a scalar
- */
-static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar );
-
-/*
- * Negate all elements of a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec );
-
-/*
- * Construct x axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( );
-
-/*
- * Construct y axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( );
-
-/*
- * Construct z axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( );
-
-/*
- * Construct w axis
- */
-static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( );
-
-/*
- * Multiply two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Divide two 4-D vectors per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Compute the reciprocal of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the reciprocal square root of a 4-D vector per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the absolute value of a 4-D vector per element
- */
-static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec );
-
-/*
- * Copy sign from one 4-D vector to another, per element
- */
-static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Maximum of two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Minimum of two 4-D vectors per element
- */
-static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Maximum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec );
-
-/*
- * Minimum element of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec );
-
-/*
- * Compute the sum of all elements of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec );
-
-/*
- * Compute the dot product of two 4-D vectors
- */
-static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Compute the square of the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec );
-
-/*
- * Compute the length of a 4-D vector
- */
-static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec );
-
-/*
- * Normalize a 4-D vector
- * NOTE: 
- * The result is unpredictable when all elements of vec are at or near zero.
- */
-static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec );
-
-/*
- * Outer product of two 4-D vectors
- */
-static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Linear interpolation between two 4-D vectors
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
-
-/*
- * Spherical linear interpolation between two 4-D vectors
- * NOTE: 
- * The result is unpredictable if the vectors point in opposite directions.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 );
-
-/*
- * Conditionally select between two 4-D vectors
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 );
-
-/*
- * Store four slots of an SoA 4-D vector as half-floats
- */
-static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4-D vector
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Print_V( VmathSoaVector4 vec );
-
-/*
- * Print a 4-D vector and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name );
-
-#endif
-
-/*
- * Construct a 3-D point from x, y, and z elements
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
-
-/*
- * Copy elements from a 3-D vector into a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec );
-
-/*
- * Set all elements of a 3-D point to the same scalar value
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt );
-
-/*
- * Insert four AoS 3-D points
- */
-static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 );
-
-/*
- * Extract four AoS 3-D points
- */
-static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
-
-/*
- * Set the x element of a 3-D point
- */
-static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 x );
-
-/*
- * Set the y element of a 3-D point
- */
-static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 y );
-
-/*
- * Set the z element of a 3-D point
- */
-static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 z );
-
-/*
- * Get the x element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt );
-
-/*
- * Get the y element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt );
-
-/*
- * Get the z element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt );
-
-/*
- * Set an x, y, or z element of a 3-D point by index
- */
-static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, or z element of a 3-D point by index
- */
-static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx );
-
-/*
- * Subtract a 3-D point from another 3-D point
- */
-static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Add a 3-D point to a 3-D vector
- */
-static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
-
-/*
- * Subtract a 3-D vector from a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
-
-/*
- * Multiply two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Divide two 3-D points per element
- * NOTE: 
- * Floating-point behavior matches standard library function divf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Compute the reciprocal of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function recipf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function sqrtf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the reciprocal square root of a 3-D point per element
- * NOTE: 
- * Floating-point behavior matches standard library function rsqrtf4.
- */
-static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the absolute value of a 3-D point per element
- */
-static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Copy sign from one 3-D point to another, per element
- */
-static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Maximum of two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Minimum of two 3-D points per element
- */
-static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Maximum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Minimum element of a 3-D point
- */
-static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the sum of all elements of a 3-D point
- */
-static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt );
-
-/*
- * Apply uniform scale to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal );
-
-/*
- * Apply non-uniform scale to a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec );
-
-/*
- * Scalar projection of a 3-D point on a unit-length 3-D vector
- */
-static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec );
-
-/*
- * Compute the square of the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the distance of a 3-D point from the coordinate-system origin
- */
-static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt );
-
-/*
- * Compute the square of the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Compute the distance between two 3-D points
- */
-static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Linear interpolation between two 3-D points
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
-
-/*
- * Conditionally select between two 3-D points
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 );
-
-/*
- * Load four three-float 3-D points, stored in three quadwords
- */
-static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
-
-/*
- * Store four slots of an SoA 3-D point in three quadwords
- */
-static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 pnt, vec_float4 *threeQuads );
-
-/*
- * Store eight slots of two SoA 3-D points as half-floats
- */
-static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3-D point
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt );
-
-/*
- * Print a 3-D point and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name );
-
-#endif
-
-/*
- * Construct a quaternion from x, y, z, and w elements
- */
-static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-/*
- * Construct a quaternion from a 3-D vector and a scalar
- */
-static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
-
-/*
- * Copy elements from a 4-D vector into a quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec );
-
-/*
- * Convert a rotation matrix to a unit-length quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 rotMat );
-
-/*
- * Set all elements of a quaternion to the same scalar value
- */
-static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat );
-
-/*
- * Insert four AoS quaternions
- */
-static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 );
-
-/*
- * Extract four AoS quaternions
- */
-static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
-
-/*
- * Set the x, y, and z elements of a quaternion
- * NOTE: 
- * This function does not change the w element.
- */
-static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec );
-
-/*
- * Get the x, y, and z elements of a quaternion
- */
-static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat );
-
-/*
- * Set the x element of a quaternion
- */
-static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 x );
-
-/*
- * Set the y element of a quaternion
- */
-static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 y );
-
-/*
- * Set the z element of a quaternion
- */
-static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 z );
-
-/*
- * Set the w element of a quaternion
- */
-static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 w );
-
-/*
- * Get the x element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat );
-
-/*
- * Get the y element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat );
-
-/*
- * Get the z element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat );
-
-/*
- * Get the w element of a quaternion
- */
-static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat );
-
-/*
- * Set an x, y, z, or w element of a quaternion by index
- */
-static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value );
-
-/*
- * Get an x, y, z, or w element of a quaternion by index
- */
-static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx );
-
-/*
- * Add two quaternions
- */
-static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Subtract a quaternion from another quaternion
- */
-static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Multiply two quaternions
- */
-static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Multiply a quaternion by a scalar
- */
-static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar );
-
-/*
- * Divide a quaternion by a scalar
- */
-static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar );
-
-/*
- * Negate all elements of a quaternion
- */
-static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat );
-
-/*
- * Construct an identity quaternion
- */
-static inline VmathSoaQuat vmathSoaQMakeIdentity_V( );
-
-/*
- * Construct a quaternion to rotate between two unit-length 3-D vectors
- * NOTE: 
- * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
-
-/*
- * Construct a quaternion to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a quaternion to rotate around the x axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the y axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a quaternion to rotate around the z axis
- */
-static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians );
-
-/*
- * Compute the conjugate of a quaternion
- */
-static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat );
-
-/*
- * Use a unit-length quaternion to rotate a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat unitQuat, VmathSoaVector3 vec );
-
-/*
- * Compute the dot product of two quaternions
- */
-static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Compute the norm of a quaternion
- */
-static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat );
-
-/*
- * Compute the length of a quaternion
- */
-static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat );
-
-/*
- * Normalize a quaternion
- * NOTE: 
- * The result is unpredictable when all elements of quat are at or near zero.
- */
-static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat );
-
-/*
- * Linear interpolation between two quaternions
- * NOTE: 
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 );
-
-/*
- * Spherical linear interpolation between two quaternions
- * NOTE: 
- * Interpolates along the shortest path between orientations.
- * Does not clamp t between 0 and 1.
- */
-static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 );
-
-/*
- * Spherical quadrangle interpolation
- */
-static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 );
-
-/*
- * Conditionally select between two quaternions
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a quaternion
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrint_V( VmathSoaQuat quat );
-
-/*
- * Print a quaternion and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x3 matrix containing the specified columns
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2 );
-
-/*
- * Construct a 3x3 rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Set all elements of a 3x3 matrix to the same scalar value
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat );
-
-/*
- * Insert four AoS 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 );
-
-/*
- * Extract four AoS 3x3 matrices
- */
-static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
-
-/*
- * Set column 0 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 col0 );
-
-/*
- * Set column 1 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 col1 );
-
-/*
- * Set column 2 of a 3x3 matrix
- */
-static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 col2 );
-
-/*
- * Get column 0 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat );
-
-/*
- * Get column 1 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat );
-
-/*
- * Get column 2 of a 3x3 matrix
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat );
-
-/*
- * Set the column of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec );
-
-/*
- * Set the row of a 3x3 matrix referred to by the specified index
- */
-static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec );
-
-/*
- * Get the column of a 3x3 matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col );
-
-/*
- * Get the row of a 3x3 matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row );
-
-/*
- * Set the element of a 3x3 matrix referred to by column and row indices
- */
-static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x3 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row );
-
-/*
- * Add two 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Subtract a 3x3 matrix from another 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Negate all elements of a 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat );
-
-/*
- * Multiply a 3x3 matrix by a scalar
- */
-static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar );
-
-/*
- * Multiply a 3x3 matrix by a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec );
-
-/*
- * Multiply two 3x3 matrices
- */
-static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Construct an identity 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( );
-
-/*
- * Construct a 3x3 matrix to rotate around the x axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the y axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the z axis
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 3x3 matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 3x3 matrix to perform scaling
- */
-static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat );
-
-/*
- * Multiply two 3x3 matrices per element
- */
-static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
-
-/*
- * Compute the absolute value of a 3x3 matrix per element
- */
-static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat );
-
-/*
- * Transpose of a 3x3 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat );
-
-/*
- * Compute the inverse of a 3x3 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat );
-
-/*
- * Determinant of a 3x3 matrix
- */
-static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat );
-
-/*
- * Conditionally select between two 3x3 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x3 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat );
-
-/*
- * Print a 3x3 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 4x4 matrix containing the specified columns
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 col0, VmathSoaVector4 col1, VmathSoaVector4 col2, VmathSoaVector4 col3 );
-
-/*
- * Construct a 4x4 matrix from a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat );
-
-/*
- * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec );
-
-/*
- * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
-
-/*
- * Set all elements of a 4x4 matrix to the same scalar value
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat );
-
-/*
- * Insert four AoS 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 );
-
-/*
- * Extract four AoS 4x4 matrices
- */
-static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 4x4 matrix
- */
-static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat );
-
-/*
- * Set translation component
- * NOTE: 
- * This function does not change the bottom row elements.
- */
-static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec );
-
-/*
- * Get the translation component of a 4x4 matrix
- */
-static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat );
-
-/*
- * Set column 0 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 col0 );
-
-/*
- * Set column 1 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 col1 );
-
-/*
- * Set column 2 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 col2 );
-
-/*
- * Set column 3 of a 4x4 matrix
- */
-static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 col3 );
-
-/*
- * Get column 0 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 1 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 2 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat );
-
-/*
- * Get column 3 of a 4x4 matrix
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat );
-
-/*
- * Set the column of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec );
-
-/*
- * Set the row of a 4x4 matrix referred to by the specified index
- */
-static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec );
-
-/*
- * Get the column of a 4x4 matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col );
-
-/*
- * Get the row of a 4x4 matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row );
-
-/*
- * Set the element of a 4x4 matrix referred to by column and row indices
- */
-static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 4x4 matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row );
-
-/*
- * Add two 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Subtract a 4x4 matrix from another 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Negate all elements of a 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat );
-
-/*
- * Multiply a 4x4 matrix by a scalar
- */
-static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar );
-
-/*
- * Multiply a 4x4 matrix by a 4-D vector
- */
-static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D vector
- */
-static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec );
-
-/*
- * Multiply a 4x4 matrix by a 3-D point
- */
-static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt );
-
-/*
- * Multiply two 4x4 matrices
- */
-static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Multiply a 4x4 matrix by a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm );
-
-/*
- * Construct an identity 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( );
-
-/*
- * Construct a 4x4 matrix to rotate around the x axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the y axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the z axis
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 4x4 matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 4x4 matrix to perform scaling
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Construct a 4x4 matrix to perform translation
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec );
-
-/*
- * Construct viewing matrix based on eye position, position looked at, and up direction
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec );
-
-/*
- * Construct a perspective projection matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct a perspective projection matrix based on frustum
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Construct an orthographic projection matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-/*
- * Append (post-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat );
-
-/*
- * Multiply two 4x4 matrices per element
- */
-static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
-
-/*
- * Compute the absolute value of a 4x4 matrix per element
- */
-static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat );
-
-/*
- * Transpose of a 4x4 matrix
- */
-static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix
- * NOTE: 
- * Result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
- */
-static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
- */
-static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat );
-
-/*
- * Determinant of a 4x4 matrix
- */
-static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat );
-
-/*
- * Conditionally select between two 4x4 matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 4x4 matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat );
-
-/*
- * Print a 4x4 matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name );
-
-#endif
-
-/*
- * Construct a 3x4 transformation matrix containing the specified columns
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2, VmathSoaVector3 col3 );
-
-/*
- * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec );
-
-/*
- * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
-
-/*
- * Set all elements of a 3x4 transformation matrix to the same scalar value
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar );
-
-/*
- * Replicate an AoS 3x4 transformation matrix
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm );
-
-/*
- * Insert four AoS 3x4 transformation matrices
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 );
-
-/*
- * Extract four AoS 3x4 transformation matrices
- */
-static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
-
-/*
- * Set the upper-left 3x3 submatrix
- */
-static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 mat3 );
-
-/*
- * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
- */
-static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set translation component
- */
-static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec );
-
-/*
- * Get the translation component of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set column 0 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 col0 );
-
-/*
- * Set column 1 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 col1 );
-
-/*
- * Set column 2 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 col2 );
-
-/*
- * Set column 3 of a 3x4 transformation matrix
- */
-static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 col3 );
-
-/*
- * Get column 0 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 1 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 2 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm );
-
-/*
- * Get column 3 of a 3x4 transformation matrix
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm );
-
-/*
- * Set the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec );
-
-/*
- * Set the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec );
-
-/*
- * Get the column of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col );
-
-/*
- * Get the row of a 3x4 transformation matrix referred to by the specified index
- */
-static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row );
-
-/*
- * Set the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
-
-/*
- * Get the element of a 3x4 transformation matrix referred to by column and row indices
- */
-static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D vector
- */
-static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec );
-
-/*
- * Multiply a 3x4 transformation matrix by a 3-D point
- */
-static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt );
-
-/*
- * Multiply two 3x4 transformation matrices
- */
-static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
-
-/*
- * Construct an identity 3x4 transformation matrix
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the y axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the z axis
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
-
-/*
- * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
-
-/*
- * Construct a rotation matrix from a unit-length quaternion
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat );
-
-/*
- * Construct a 3x4 transformation matrix to perform scaling
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec );
-
-/*
- * Construct a 3x4 transformation matrix to perform translation
- */
-static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec );
-
-/*
- * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec );
-
-/*
- * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
- * NOTE: 
- * Faster than creating and multiplying a scale transformation matrix.
- */
-static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm );
-
-/*
- * Multiply two 3x4 transformation matrices per element
- */
-static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
-
-/*
- * Compute the absolute value of a 3x4 transformation matrix per element
- */
-static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm );
-
-/*
- * Inverse of a 3x4 transformation matrix
- * NOTE: 
- * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
- */
-static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm );
-
-/*
- * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
- * NOTE: 
- * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
- */
-static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm );
-
-/*
- * Conditionally select between two 3x4 transformation matrices
- * NOTE: 
- * This function uses a conditional select instruction to avoid a branch.
- */
-static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-/*
- * Print a 3x4 transformation matrix
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm );
-
-/*
- * Print a 3x4 transformation matrix and an associated string identifier
- * NOTE: 
- * Function is only defined when _VECTORMATH_DEBUG is defined.
- */
-static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name );
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#include "vectormath_soa.h"
-#include "vec_soa_v.h"
-#include "quat_soa_v.h"
-#include "mat_soa_v.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_C_V_SPU_H
+#define _VECTORMATH_SOA_C_V_SPU_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+#include "vectormath_aos_v.h"
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef _VECTORMATH_SOA_C_TYPES_H
+#define _VECTORMATH_SOA_C_TYPES_H
+
+/* A set of four 3-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaVector3;
+
+/* A set of four 4-D vectors in structure-of-arrays format
+ */
+typedef struct _VmathSoaVector4
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaVector4;
+
+/* A set of four 3-D points in structure-of-arrays format
+ */
+typedef struct _VmathSoaPoint3
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+} VmathSoaPoint3;
+
+/* A set of four quaternions in structure-of-arrays format
+ */
+typedef struct _VmathSoaQuat
+{
+    vec_float4 x;
+    vec_float4 y;
+    vec_float4 z;
+    vec_float4 w;
+} VmathSoaQuat;
+
+/* A set of four 3x3 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+} VmathSoaMatrix3;
+
+/* A set of four 4x4 matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaMatrix4
+{
+    VmathSoaVector4 col0;
+    VmathSoaVector4 col1;
+    VmathSoaVector4 col2;
+    VmathSoaVector4 col3;
+} VmathSoaMatrix4;
+
+/* A set of four 3x4 transformation matrices in structure-of-arrays format
+ */
+typedef struct _VmathSoaTransform3
+{
+    VmathSoaVector3 col0;
+    VmathSoaVector3 col1;
+    VmathSoaVector3 col2;
+    VmathSoaVector3 col3;
+} VmathSoaTransform3;
+
+#endif
+
+/*
+ * Construct a 3-D vector from x, y, and z elements
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D point into a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromP3_V( VmathSoaPoint3 pnt );
+
+/*
+ * Set all elements of a 3-D vector to the same scalar value
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFromAos_V( VmathVector3 vec );
+
+/*
+ * Insert four AoS 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeFrom4Aos_V( VmathVector3 vec0, VmathVector3 vec1, VmathVector3 vec2, VmathVector3 vec3 );
+
+/*
+ * Extract four AoS 3-D vectors
+ */
+static inline void vmathSoaV3Get4Aos_V( VmathSoaVector3 vec, VmathVector3 *result0, VmathVector3 *result1, VmathVector3 *result2, VmathVector3 *result3 );
+
+/*
+ * Set the x element of a 3-D vector
+ */
+static inline void vmathSoaV3SetX_V( VmathSoaVector3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D vector
+ */
+static inline void vmathSoaV3SetY_V( VmathSoaVector3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D vector
+ */
+static inline void vmathSoaV3SetZ_V( VmathSoaVector3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetX_V( VmathSoaVector3 vec );
+
+/*
+ * Get the y element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetY_V( VmathSoaVector3 vec );
+
+/*
+ * Get the z element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3GetZ_V( VmathSoaVector3 vec );
+
+/*
+ * Set an x, y, or z element of a 3-D vector by index
+ */
+static inline void vmathSoaV3SetElem_V( VmathSoaVector3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D vector by index
+ */
+static inline vec_float4 vmathSoaV3GetElem_V( VmathSoaVector3 vec, int idx );
+
+/*
+ * Add two 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3Add_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Subtract a 3-D vector from another 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3Sub_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Add a 3-D vector to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaV3AddP3_V( VmathSoaVector3 vec, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply a 3-D vector by a scalar
+ */
+static inline VmathSoaVector3 vmathSoaV3ScalarMul_V( VmathSoaVector3 vec, vec_float4 scalar );
+
+/*
+ * Divide a 3-D vector by a scalar
+ */
+static inline VmathSoaVector3 vmathSoaV3ScalarDiv_V( VmathSoaVector3 vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV3Neg_V( VmathSoaVector3 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathSoaVector3 vmathSoaV3MakeZAxis_V( );
+
+/*
+ * Multiply two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MulPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Divide two 3-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3DivPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Compute the reciprocal of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3RecipPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3SqrtPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the reciprocal square root of a 3-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaVector3 vmathSoaV3RsqrtPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the absolute value of a 3-D vector per element
+ */
+static inline VmathSoaVector3 vmathSoaV3AbsPerElem_V( VmathSoaVector3 vec );
+
+/*
+ * Copy sign from one 3-D vector to another, per element
+ */
+static inline VmathSoaVector3 vmathSoaV3CopySignPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Maximum of two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MaxPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Minimum of two 3-D vectors per element
+ */
+static inline VmathSoaVector3 vmathSoaV3MinPerElem_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Maximum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MaxElem_V( VmathSoaVector3 vec );
+
+/*
+ * Minimum element of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3MinElem_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the sum of all elements of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Sum_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the dot product of two 3-D vectors
+ */
+static inline vec_float4 vmathSoaV3Dot_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Compute the square of the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3LengthSqr_V( VmathSoaVector3 vec );
+
+/*
+ * Compute the length of a 3-D vector
+ */
+static inline vec_float4 vmathSoaV3Length_V( VmathSoaVector3 vec );
+
+/*
+ * Normalize a 3-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathSoaVector3 vmathSoaV3Normalize_V( VmathSoaVector3 vec );
+
+/*
+ * Compute cross product of two 3-D vectors
+ */
+static inline VmathSoaVector3 vmathSoaV3Cross_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Outer product of two 3-D vectors
+ */
+static inline VmathSoaMatrix3 vmathSoaV3Outer_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Pre-multiply a row vector by a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaV3RowMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
+
+/*
+ * Cross-product matrix of a 3-D vector
+ */
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrix_V( VmathSoaVector3 vec );
+
+/*
+ * Create cross-product matrix and multiply
+ * NOTE: 
+ * Faster than separately creating a cross-product matrix and multiplying.
+ */
+static inline VmathSoaMatrix3 vmathSoaV3CrossMatrixMul_V( VmathSoaVector3 vec, VmathSoaMatrix3 mat );
+
+/*
+ * Linear interpolation between two 3-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector3 vmathSoaV3Lerp_V( vec_float4 t, VmathSoaVector3 vec0, VmathSoaVector3 vec1 );
+
+/*
+ * Spherical linear interpolation between two 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector3 vmathSoaV3Slerp_V( vec_float4 t, VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
+
+/*
+ * Conditionally select between two 3-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaVector3 vmathSoaV3Select_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D vectors, stored in three quadwords
+ */
+static inline void vmathSoaV3LoadXYZArray_V( VmathSoaVector3 *vec, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D vector in three quadwords
+ */
+static inline void vmathSoaV3StoreXYZArray_V( VmathSoaVector3 vec, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D vectors as half-floats
+ */
+static inline void vmathSoaV3StoreHalfFloats_V( VmathSoaVector3 vec0, VmathSoaVector3 vec1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Print_V( VmathSoaVector3 vec );
+
+/*
+ * Print a 3-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV3Prints_V( VmathSoaVector3 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 4-D vector from x, y, z, and w elements
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a 4-D vector from a 3-D vector and a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
+
+/*
+ * Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromV3_V( VmathSoaVector3 vec );
+
+/*
+ * Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromP3_V( VmathSoaPoint3 pnt );
+
+/*
+ * Copy elements from a quaternion into a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromQ_V( VmathSoaQuat quat );
+
+/*
+ * Set all elements of a 4-D vector to the same scalar value
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFromAos_V( VmathVector4 vec );
+
+/*
+ * Insert four AoS 4-D vectors
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeFrom4Aos_V( VmathVector4 vec0, VmathVector4 vec1, VmathVector4 vec2, VmathVector4 vec3 );
+
+/*
+ * Extract four AoS 4-D vectors
+ */
+static inline void vmathSoaV4Get4Aos_V( VmathSoaVector4 vec, VmathVector4 *result0, VmathVector4 *result1, VmathVector4 *result2, VmathVector4 *result3 );
+
+/*
+ * Set the x, y, and z elements of a 4-D vector
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaV4SetXYZ_V( VmathSoaVector4 *result, VmathSoaVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a 4-D vector
+ */
+static inline VmathSoaVector3 vmathSoaV4GetXYZ_V( VmathSoaVector4 vec );
+
+/*
+ * Set the x element of a 4-D vector
+ */
+static inline void vmathSoaV4SetX_V( VmathSoaVector4 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 4-D vector
+ */
+static inline void vmathSoaV4SetY_V( VmathSoaVector4 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 4-D vector
+ */
+static inline void vmathSoaV4SetZ_V( VmathSoaVector4 *result, vec_float4 z );
+
+/*
+ * Set the w element of a 4-D vector
+ */
+static inline void vmathSoaV4SetW_V( VmathSoaVector4 *result, vec_float4 w );
+
+/*
+ * Get the x element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetX_V( VmathSoaVector4 vec );
+
+/*
+ * Get the y element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetY_V( VmathSoaVector4 vec );
+
+/*
+ * Get the z element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetZ_V( VmathSoaVector4 vec );
+
+/*
+ * Get the w element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4GetW_V( VmathSoaVector4 vec );
+
+/*
+ * Set an x, y, z, or w element of a 4-D vector by index
+ */
+static inline void vmathSoaV4SetElem_V( VmathSoaVector4 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a 4-D vector by index
+ */
+static inline vec_float4 vmathSoaV4GetElem_V( VmathSoaVector4 vec, int idx );
+
+/*
+ * Add two 4-D vectors
+ */
+static inline VmathSoaVector4 vmathSoaV4Add_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Subtract a 4-D vector from another 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4Sub_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Multiply a 4-D vector by a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4ScalarMul_V( VmathSoaVector4 vec, vec_float4 scalar );
+
+/*
+ * Divide a 4-D vector by a scalar
+ */
+static inline VmathSoaVector4 vmathSoaV4ScalarDiv_V( VmathSoaVector4 vec, vec_float4 scalar );
+
+/*
+ * Negate all elements of a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaV4Neg_V( VmathSoaVector4 vec );
+
+/*
+ * Construct x axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeXAxis_V( );
+
+/*
+ * Construct y axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeYAxis_V( );
+
+/*
+ * Construct z axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeZAxis_V( );
+
+/*
+ * Construct w axis
+ */
+static inline VmathSoaVector4 vmathSoaV4MakeWAxis_V( );
+
+/*
+ * Multiply two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MulPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Divide two 4-D vectors per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4DivPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Compute the reciprocal of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4RecipPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4SqrtPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the reciprocal square root of a 4-D vector per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaVector4 vmathSoaV4RsqrtPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the absolute value of a 4-D vector per element
+ */
+static inline VmathSoaVector4 vmathSoaV4AbsPerElem_V( VmathSoaVector4 vec );
+
+/*
+ * Copy sign from one 4-D vector to another, per element
+ */
+static inline VmathSoaVector4 vmathSoaV4CopySignPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Maximum of two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MaxPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Minimum of two 4-D vectors per element
+ */
+static inline VmathSoaVector4 vmathSoaV4MinPerElem_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Maximum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MaxElem_V( VmathSoaVector4 vec );
+
+/*
+ * Minimum element of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4MinElem_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the sum of all elements of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Sum_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the dot product of two 4-D vectors
+ */
+static inline vec_float4 vmathSoaV4Dot_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Compute the square of the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4LengthSqr_V( VmathSoaVector4 vec );
+
+/*
+ * Compute the length of a 4-D vector
+ */
+static inline vec_float4 vmathSoaV4Length_V( VmathSoaVector4 vec );
+
+/*
+ * Normalize a 4-D vector
+ * NOTE: 
+ * The result is unpredictable when all elements of vec are at or near zero.
+ */
+static inline VmathSoaVector4 vmathSoaV4Normalize_V( VmathSoaVector4 vec );
+
+/*
+ * Outer product of two 4-D vectors
+ */
+static inline VmathSoaMatrix4 vmathSoaV4Outer_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Linear interpolation between two 4-D vectors
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector4 vmathSoaV4Lerp_V( vec_float4 t, VmathSoaVector4 vec0, VmathSoaVector4 vec1 );
+
+/*
+ * Spherical linear interpolation between two 4-D vectors
+ * NOTE: 
+ * The result is unpredictable if the vectors point in opposite directions.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaVector4 vmathSoaV4Slerp_V( vec_float4 t, VmathSoaVector4 unitVec0, VmathSoaVector4 unitVec1 );
+
+/*
+ * Conditionally select between two 4-D vectors
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaVector4 vmathSoaV4Select_V( VmathSoaVector4 vec0, VmathSoaVector4 vec1, vec_uint4 select1 );
+
+/*
+ * Store four slots of an SoA 4-D vector as half-floats
+ */
+static inline void vmathSoaV4StoreHalfFloats_V( VmathSoaVector4 vec, vec_ushort8 *twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4-D vector
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Print_V( VmathSoaVector4 vec );
+
+/*
+ * Print a 4-D vector and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaV4Prints_V( VmathSoaVector4 vec, const char *name );
+
+#endif
+
+/*
+ * Construct a 3-D point from x, y, and z elements
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z );
+
+/*
+ * Copy elements from a 3-D vector into a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromV3_V( VmathSoaVector3 vec );
+
+/*
+ * Set all elements of a 3-D point to the same scalar value
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFromAos_V( VmathPoint3 pnt );
+
+/*
+ * Insert four AoS 3-D points
+ */
+static inline VmathSoaPoint3 vmathSoaP3MakeFrom4Aos_V( VmathPoint3 pnt0, VmathPoint3 pnt1, VmathPoint3 pnt2, VmathPoint3 pnt3 );
+
+/*
+ * Extract four AoS 3-D points
+ */
+static inline void vmathSoaP3Get4Aos_V( VmathSoaPoint3 pnt, VmathPoint3 *result0, VmathPoint3 *result1, VmathPoint3 *result2, VmathPoint3 *result3 );
+
+/*
+ * Set the x element of a 3-D point
+ */
+static inline void vmathSoaP3SetX_V( VmathSoaPoint3 *result, vec_float4 x );
+
+/*
+ * Set the y element of a 3-D point
+ */
+static inline void vmathSoaP3SetY_V( VmathSoaPoint3 *result, vec_float4 y );
+
+/*
+ * Set the z element of a 3-D point
+ */
+static inline void vmathSoaP3SetZ_V( VmathSoaPoint3 *result, vec_float4 z );
+
+/*
+ * Get the x element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetX_V( VmathSoaPoint3 pnt );
+
+/*
+ * Get the y element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetY_V( VmathSoaPoint3 pnt );
+
+/*
+ * Get the z element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3GetZ_V( VmathSoaPoint3 pnt );
+
+/*
+ * Set an x, y, or z element of a 3-D point by index
+ */
+static inline void vmathSoaP3SetElem_V( VmathSoaPoint3 *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, or z element of a 3-D point by index
+ */
+static inline vec_float4 vmathSoaP3GetElem_V( VmathSoaPoint3 pnt, int idx );
+
+/*
+ * Subtract a 3-D point from another 3-D point
+ */
+static inline VmathSoaVector3 vmathSoaP3Sub_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Add a 3-D point to a 3-D vector
+ */
+static inline VmathSoaPoint3 vmathSoaP3AddV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
+
+/*
+ * Subtract a 3-D vector from a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3SubV3_V( VmathSoaPoint3 pnt, VmathSoaVector3 vec );
+
+/*
+ * Multiply two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MulPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Divide two 3-D points per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function divf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3DivPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Compute the reciprocal of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function recipf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3RecipPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function sqrtf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3SqrtPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the reciprocal square root of a 3-D point per element
+ * NOTE: 
+ * Floating-point behavior matches standard library function rsqrtf4.
+ */
+static inline VmathSoaPoint3 vmathSoaP3RsqrtPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the absolute value of a 3-D point per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3AbsPerElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Copy sign from one 3-D point to another, per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3CopySignPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Maximum of two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MaxPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Minimum of two 3-D points per element
+ */
+static inline VmathSoaPoint3 vmathSoaP3MinPerElem_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Maximum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MaxElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Minimum element of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3MinElem_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the sum of all elements of a 3-D point
+ */
+static inline vec_float4 vmathSoaP3Sum_V( VmathSoaPoint3 pnt );
+
+/*
+ * Apply uniform scale to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3Scale_V( VmathSoaPoint3 pnt, vec_float4 scaleVal );
+
+/*
+ * Apply non-uniform scale to a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaP3NonUniformScale_V( VmathSoaPoint3 pnt, VmathSoaVector3 scaleVec );
+
+/*
+ * Scalar projection of a 3-D point on a unit-length 3-D vector
+ */
+static inline vec_float4 vmathSoaP3Projection_V( VmathSoaPoint3 pnt, VmathSoaVector3 unitVec );
+
+/*
+ * Compute the square of the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistSqrFromOrigin_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the distance of a 3-D point from the coordinate-system origin
+ */
+static inline vec_float4 vmathSoaP3DistFromOrigin_V( VmathSoaPoint3 pnt );
+
+/*
+ * Compute the square of the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3DistSqr_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Compute the distance between two 3-D points
+ */
+static inline vec_float4 vmathSoaP3Dist_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Linear interpolation between two 3-D points
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaPoint3 vmathSoaP3Lerp_V( vec_float4 t, VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1 );
+
+/*
+ * Conditionally select between two 3-D points
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaPoint3 vmathSoaP3Select_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_uint4 select1 );
+
+/*
+ * Load four three-float 3-D points, stored in three quadwords
+ */
+static inline void vmathSoaP3LoadXYZArray_V( VmathSoaPoint3 *pnt, const vec_float4 *threeQuads );
+
+/*
+ * Store four slots of an SoA 3-D point in three quadwords
+ */
+static inline void vmathSoaP3StoreXYZArray_V( VmathSoaPoint3 pnt, vec_float4 *threeQuads );
+
+/*
+ * Store eight slots of two SoA 3-D points as half-floats
+ */
+static inline void vmathSoaP3StoreHalfFloats_V( VmathSoaPoint3 pnt0, VmathSoaPoint3 pnt1, vec_ushort8 *threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3-D point
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Print_V( VmathSoaPoint3 pnt );
+
+/*
+ * Print a 3-D point and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaP3Prints_V( VmathSoaPoint3 pnt, const char *name );
+
+#endif
+
+/*
+ * Construct a quaternion from x, y, z, and w elements
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromElems_V( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+/*
+ * Construct a quaternion from a 3-D vector and a scalar
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromV3Scalar_V( VmathSoaVector3 xyz, vec_float4 w );
+
+/*
+ * Copy elements from a 4-D vector into a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromV4_V( VmathSoaVector4 vec );
+
+/*
+ * Convert a rotation matrix to a unit-length quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromM3_V( VmathSoaMatrix3 rotMat );
+
+/*
+ * Set all elements of a quaternion to the same scalar value
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeFromAos_V( VmathQuat quat );
+
+/*
+ * Insert four AoS quaternions
+ */
+static inline VmathSoaQuat vmathSoaQMakeFrom4Aos_V( VmathQuat quat0, VmathQuat quat1, VmathQuat quat2, VmathQuat quat3 );
+
+/*
+ * Extract four AoS quaternions
+ */
+static inline void vmathSoaQGet4Aos_V( VmathSoaQuat quat, VmathQuat *result0, VmathQuat *result1, VmathQuat *result2, VmathQuat *result3 );
+
+/*
+ * Set the x, y, and z elements of a quaternion
+ * NOTE: 
+ * This function does not change the w element.
+ */
+static inline void vmathSoaQSetXYZ_V( VmathSoaQuat *result, VmathSoaVector3 vec );
+
+/*
+ * Get the x, y, and z elements of a quaternion
+ */
+static inline VmathSoaVector3 vmathSoaQGetXYZ_V( VmathSoaQuat quat );
+
+/*
+ * Set the x element of a quaternion
+ */
+static inline void vmathSoaQSetX_V( VmathSoaQuat *result, vec_float4 x );
+
+/*
+ * Set the y element of a quaternion
+ */
+static inline void vmathSoaQSetY_V( VmathSoaQuat *result, vec_float4 y );
+
+/*
+ * Set the z element of a quaternion
+ */
+static inline void vmathSoaQSetZ_V( VmathSoaQuat *result, vec_float4 z );
+
+/*
+ * Set the w element of a quaternion
+ */
+static inline void vmathSoaQSetW_V( VmathSoaQuat *result, vec_float4 w );
+
+/*
+ * Get the x element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetX_V( VmathSoaQuat quat );
+
+/*
+ * Get the y element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetY_V( VmathSoaQuat quat );
+
+/*
+ * Get the z element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetZ_V( VmathSoaQuat quat );
+
+/*
+ * Get the w element of a quaternion
+ */
+static inline vec_float4 vmathSoaQGetW_V( VmathSoaQuat quat );
+
+/*
+ * Set an x, y, z, or w element of a quaternion by index
+ */
+static inline void vmathSoaQSetElem_V( VmathSoaQuat *result, int idx, vec_float4 value );
+
+/*
+ * Get an x, y, z, or w element of a quaternion by index
+ */
+static inline vec_float4 vmathSoaQGetElem_V( VmathSoaQuat quat, int idx );
+
+/*
+ * Add two quaternions
+ */
+static inline VmathSoaQuat vmathSoaQAdd_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Subtract a quaternion from another quaternion
+ */
+static inline VmathSoaQuat vmathSoaQSub_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Multiply two quaternions
+ */
+static inline VmathSoaQuat vmathSoaQMul_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Multiply a quaternion by a scalar
+ */
+static inline VmathSoaQuat vmathSoaQScalarMul_V( VmathSoaQuat quat, vec_float4 scalar );
+
+/*
+ * Divide a quaternion by a scalar
+ */
+static inline VmathSoaQuat vmathSoaQScalarDiv_V( VmathSoaQuat quat, vec_float4 scalar );
+
+/*
+ * Negate all elements of a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQNeg_V( VmathSoaQuat quat );
+
+/*
+ * Construct an identity quaternion
+ */
+static inline VmathSoaQuat vmathSoaQMakeIdentity_V( );
+
+/*
+ * Construct a quaternion to rotate between two unit-length 3-D vectors
+ * NOTE: 
+ * The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationArc_V( VmathSoaVector3 unitVec0, VmathSoaVector3 unitVec1 );
+
+/*
+ * Construct a quaternion to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a quaternion to rotate around the x axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the y axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a quaternion to rotate around the z axis
+ */
+static inline VmathSoaQuat vmathSoaQMakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Compute the conjugate of a quaternion
+ */
+static inline VmathSoaQuat vmathSoaQConj_V( VmathSoaQuat quat );
+
+/*
+ * Use a unit-length quaternion to rotate a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaQRotate_V( VmathSoaQuat unitQuat, VmathSoaVector3 vec );
+
+/*
+ * Compute the dot product of two quaternions
+ */
+static inline vec_float4 vmathSoaQDot_V( VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Compute the norm of a quaternion
+ */
+static inline vec_float4 vmathSoaQNorm_V( VmathSoaQuat quat );
+
+/*
+ * Compute the length of a quaternion
+ */
+static inline vec_float4 vmathSoaQLength_V( VmathSoaQuat quat );
+
+/*
+ * Normalize a quaternion
+ * NOTE: 
+ * The result is unpredictable when all elements of quat are at or near zero.
+ */
+static inline VmathSoaQuat vmathSoaQNormalize_V( VmathSoaQuat quat );
+
+/*
+ * Linear interpolation between two quaternions
+ * NOTE: 
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaQuat vmathSoaQLerp_V( vec_float4 t, VmathSoaQuat quat0, VmathSoaQuat quat1 );
+
+/*
+ * Spherical linear interpolation between two quaternions
+ * NOTE: 
+ * Interpolates along the shortest path between orientations.
+ * Does not clamp t between 0 and 1.
+ */
+static inline VmathSoaQuat vmathSoaQSlerp_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1 );
+
+/*
+ * Spherical quadrangle interpolation
+ */
+static inline VmathSoaQuat vmathSoaQSquad_V( vec_float4 t, VmathSoaQuat unitQuat0, VmathSoaQuat unitQuat1, VmathSoaQuat unitQuat2, VmathSoaQuat unitQuat3 );
+
+/*
+ * Conditionally select between two quaternions
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaQuat vmathSoaQSelect_V( VmathSoaQuat quat0, VmathSoaQuat quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a quaternion
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrint_V( VmathSoaQuat quat );
+
+/*
+ * Print a quaternion and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaQPrints_V( VmathSoaQuat quat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x3 matrix containing the specified columns
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2 );
+
+/*
+ * Construct a 3x3 rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Set all elements of a 3x3 matrix to the same scalar value
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFromAos_V( VmathMatrix3 mat );
+
+/*
+ * Insert four AoS 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeFrom4Aos_V( VmathMatrix3 mat0, VmathMatrix3 mat1, VmathMatrix3 mat2, VmathMatrix3 mat3 );
+
+/*
+ * Extract four AoS 3x3 matrices
+ */
+static inline void vmathSoaM3Get4Aos_V( VmathSoaMatrix3 mat, VmathMatrix3 *result0, VmathMatrix3 *result1, VmathMatrix3 *result2, VmathMatrix3 *result3 );
+
+/*
+ * Set column 0 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol0_V( VmathSoaMatrix3 *result, VmathSoaVector3 col0 );
+
+/*
+ * Set column 1 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol1_V( VmathSoaMatrix3 *result, VmathSoaVector3 col1 );
+
+/*
+ * Set column 2 of a 3x3 matrix
+ */
+static inline void vmathSoaM3SetCol2_V( VmathSoaMatrix3 *result, VmathSoaVector3 col2 );
+
+/*
+ * Get column 0 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol0_V( VmathSoaMatrix3 mat );
+
+/*
+ * Get column 1 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol1_V( VmathSoaMatrix3 mat );
+
+/*
+ * Get column 2 of a 3x3 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol2_V( VmathSoaMatrix3 mat );
+
+/*
+ * Set the column of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetCol_V( VmathSoaMatrix3 *result, int col, VmathSoaVector3 vec );
+
+/*
+ * Set the row of a 3x3 matrix referred to by the specified index
+ */
+static inline void vmathSoaM3SetRow_V( VmathSoaMatrix3 *result, int row, VmathSoaVector3 vec );
+
+/*
+ * Get the column of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaM3GetCol_V( VmathSoaMatrix3 mat, int col );
+
+/*
+ * Get the row of a 3x3 matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaM3GetRow_V( VmathSoaMatrix3 mat, int row );
+
+/*
+ * Set the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM3SetElem_V( VmathSoaMatrix3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x3 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM3GetElem_V( VmathSoaMatrix3 mat, int col, int row );
+
+/*
+ * Add two 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Add_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Subtract a 3x3 matrix from another 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Sub_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Negate all elements of a 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Neg_V( VmathSoaMatrix3 mat );
+
+/*
+ * Multiply a 3x3 matrix by a scalar
+ */
+static inline VmathSoaMatrix3 vmathSoaM3ScalarMul_V( VmathSoaMatrix3 mat, vec_float4 scalar );
+
+/*
+ * Multiply a 3x3 matrix by a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaM3MulV3_V( VmathSoaMatrix3 mat, VmathSoaVector3 vec );
+
+/*
+ * Multiply two 3x3 matrices
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Mul_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Construct an identity 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeIdentity_V( );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the y axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the z axis
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 3x3 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 3x3 matrix to perform scaling
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3AppendScale_V( VmathSoaMatrix3 mat, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix3 mat );
+
+/*
+ * Multiply two 3x3 matrices per element
+ */
+static inline VmathSoaMatrix3 vmathSoaM3MulPerElem_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1 );
+
+/*
+ * Compute the absolute value of a 3x3 matrix per element
+ */
+static inline VmathSoaMatrix3 vmathSoaM3AbsPerElem_V( VmathSoaMatrix3 mat );
+
+/*
+ * Transpose of a 3x3 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Transpose_V( VmathSoaMatrix3 mat );
+
+/*
+ * Compute the inverse of a 3x3 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Inverse_V( VmathSoaMatrix3 mat );
+
+/*
+ * Determinant of a 3x3 matrix
+ */
+static inline vec_float4 vmathSoaM3Determinant_V( VmathSoaMatrix3 mat );
+
+/*
+ * Conditionally select between two 3x3 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaMatrix3 vmathSoaM3Select_V( VmathSoaMatrix3 mat0, VmathSoaMatrix3 mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x3 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Print_V( VmathSoaMatrix3 mat );
+
+/*
+ * Print a 3x3 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM3Prints_V( VmathSoaMatrix3 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 4x4 matrix containing the specified columns
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromCols_V( VmathSoaVector4 col0, VmathSoaVector4 col1, VmathSoaVector4 col2, VmathSoaVector4 col3 );
+
+/*
+ * Construct a 4x4 matrix from a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromT3_V( VmathSoaTransform3 mat );
+
+/*
+ * Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromM3V3_V( VmathSoaMatrix3 mat, VmathSoaVector3 translateVec );
+
+/*
+ * Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
+
+/*
+ * Set all elements of a 4x4 matrix to the same scalar value
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFromAos_V( VmathMatrix4 mat );
+
+/*
+ * Insert four AoS 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrom4Aos_V( VmathMatrix4 mat0, VmathMatrix4 mat1, VmathMatrix4 mat2, VmathMatrix4 mat3 );
+
+/*
+ * Extract four AoS 4x4 matrices
+ */
+static inline void vmathSoaM4Get4Aos_V( VmathSoaMatrix4 mat, VmathMatrix4 *result0, VmathMatrix4 *result1, VmathMatrix4 *result2, VmathMatrix4 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetUpper3x3_V( VmathSoaMatrix4 *result, VmathSoaMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 4x4 matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaM4GetUpper3x3_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set translation component
+ * NOTE: 
+ * This function does not change the bottom row elements.
+ */
+static inline void vmathSoaM4SetTranslation_V( VmathSoaMatrix4 *result, VmathSoaVector3 translateVec );
+
+/*
+ * Get the translation component of a 4x4 matrix
+ */
+static inline VmathSoaVector3 vmathSoaM4GetTranslation_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set column 0 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol0_V( VmathSoaMatrix4 *result, VmathSoaVector4 col0 );
+
+/*
+ * Set column 1 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol1_V( VmathSoaMatrix4 *result, VmathSoaVector4 col1 );
+
+/*
+ * Set column 2 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol2_V( VmathSoaMatrix4 *result, VmathSoaVector4 col2 );
+
+/*
+ * Set column 3 of a 4x4 matrix
+ */
+static inline void vmathSoaM4SetCol3_V( VmathSoaMatrix4 *result, VmathSoaVector4 col3 );
+
+/*
+ * Get column 0 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol0_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 1 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol1_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 2 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol2_V( VmathSoaMatrix4 mat );
+
+/*
+ * Get column 3 of a 4x4 matrix
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol3_V( VmathSoaMatrix4 mat );
+
+/*
+ * Set the column of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetCol_V( VmathSoaMatrix4 *result, int col, VmathSoaVector4 vec );
+
+/*
+ * Set the row of a 4x4 matrix referred to by the specified index
+ */
+static inline void vmathSoaM4SetRow_V( VmathSoaMatrix4 *result, int row, VmathSoaVector4 vec );
+
+/*
+ * Get the column of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaM4GetCol_V( VmathSoaMatrix4 mat, int col );
+
+/*
+ * Get the row of a 4x4 matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaM4GetRow_V( VmathSoaMatrix4 mat, int row );
+
+/*
+ * Set the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline void vmathSoaM4SetElem_V( VmathSoaMatrix4 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 4x4 matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaM4GetElem_V( VmathSoaMatrix4 mat, int col, int row );
+
+/*
+ * Add two 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Add_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Subtract a 4x4 matrix from another 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Sub_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Negate all elements of a 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Neg_V( VmathSoaMatrix4 mat );
+
+/*
+ * Multiply a 4x4 matrix by a scalar
+ */
+static inline VmathSoaMatrix4 vmathSoaM4ScalarMul_V( VmathSoaMatrix4 mat, vec_float4 scalar );
+
+/*
+ * Multiply a 4x4 matrix by a 4-D vector
+ */
+static inline VmathSoaVector4 vmathSoaM4MulV4_V( VmathSoaMatrix4 mat, VmathSoaVector4 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D vector
+ */
+static inline VmathSoaVector4 vmathSoaM4MulV3_V( VmathSoaMatrix4 mat, VmathSoaVector3 vec );
+
+/*
+ * Multiply a 4x4 matrix by a 3-D point
+ */
+static inline VmathSoaVector4 vmathSoaM4MulP3_V( VmathSoaMatrix4 mat, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply two 4x4 matrices
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Mul_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Multiply a 4x4 matrix by a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MulT3_V( VmathSoaMatrix4 mat, VmathSoaTransform3 tfrm );
+
+/*
+ * Construct an identity 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeIdentity_V( );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the y axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the z axis
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 4x4 matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 4x4 matrix to perform scaling
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Construct a 4x4 matrix to perform translation
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeTranslation_V( VmathSoaVector3 translateVec );
+
+/*
+ * Construct viewing matrix based on eye position, position looked at, and up direction
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeLookAt_V( VmathSoaPoint3 eyePos, VmathSoaPoint3 lookAtPos, VmathSoaVector3 upVec );
+
+/*
+ * Construct a perspective projection matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakePerspective_V( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct a perspective projection matrix based on frustum
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeFrustum_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Construct an orthographic projection matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MakeOrthographic_V( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+/*
+ * Append (post-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AppendScale_V( VmathSoaMatrix4 mat, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaMatrix4 mat );
+
+/*
+ * Multiply two 4x4 matrices per element
+ */
+static inline VmathSoaMatrix4 vmathSoaM4MulPerElem_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1 );
+
+/*
+ * Compute the absolute value of a 4x4 matrix per element
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AbsPerElem_V( VmathSoaMatrix4 mat );
+
+/*
+ * Transpose of a 4x4 matrix
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Transpose_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Inverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4AffineInverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4OrthoInverse_V( VmathSoaMatrix4 mat );
+
+/*
+ * Determinant of a 4x4 matrix
+ */
+static inline vec_float4 vmathSoaM4Determinant_V( VmathSoaMatrix4 mat );
+
+/*
+ * Conditionally select between two 4x4 matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaMatrix4 vmathSoaM4Select_V( VmathSoaMatrix4 mat0, VmathSoaMatrix4 mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 4x4 matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Print_V( VmathSoaMatrix4 mat );
+
+/*
+ * Print a 4x4 matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaM4Prints_V( VmathSoaMatrix4 mat, const char *name );
+
+#endif
+
+/*
+ * Construct a 3x4 transformation matrix containing the specified columns
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromCols_V( VmathSoaVector3 col0, VmathSoaVector3 col1, VmathSoaVector3 col2, VmathSoaVector3 col3 );
+
+/*
+ * Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromM3V3_V( VmathSoaMatrix3 tfrm, VmathSoaVector3 translateVec );
+
+/*
+ * Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromQV3_V( VmathSoaQuat unitQuat, VmathSoaVector3 translateVec );
+
+/*
+ * Set all elements of a 3x4 transformation matrix to the same scalar value
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromScalar_V( vec_float4 scalar );
+
+/*
+ * Replicate an AoS 3x4 transformation matrix
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFromAos_V( VmathTransform3 tfrm );
+
+/*
+ * Insert four AoS 3x4 transformation matrices
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeFrom4Aos_V( VmathTransform3 tfrm0, VmathTransform3 tfrm1, VmathTransform3 tfrm2, VmathTransform3 tfrm3 );
+
+/*
+ * Extract four AoS 3x4 transformation matrices
+ */
+static inline void vmathSoaT3Get4Aos_V( VmathSoaTransform3 tfrm, VmathTransform3 *result0, VmathTransform3 *result1, VmathTransform3 *result2, VmathTransform3 *result3 );
+
+/*
+ * Set the upper-left 3x3 submatrix
+ */
+static inline void vmathSoaT3SetUpper3x3_V( VmathSoaTransform3 *result, VmathSoaMatrix3 mat3 );
+
+/*
+ * Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+ */
+static inline VmathSoaMatrix3 vmathSoaT3GetUpper3x3_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set translation component
+ */
+static inline void vmathSoaT3SetTranslation_V( VmathSoaTransform3 *result, VmathSoaVector3 translateVec );
+
+/*
+ * Get the translation component of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetTranslation_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set column 0 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol0_V( VmathSoaTransform3 *result, VmathSoaVector3 col0 );
+
+/*
+ * Set column 1 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol1_V( VmathSoaTransform3 *result, VmathSoaVector3 col1 );
+
+/*
+ * Set column 2 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol2_V( VmathSoaTransform3 *result, VmathSoaVector3 col2 );
+
+/*
+ * Set column 3 of a 3x4 transformation matrix
+ */
+static inline void vmathSoaT3SetCol3_V( VmathSoaTransform3 *result, VmathSoaVector3 col3 );
+
+/*
+ * Get column 0 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol0_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 1 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol1_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 2 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol2_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Get column 3 of a 3x4 transformation matrix
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol3_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Set the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetCol_V( VmathSoaTransform3 *result, int col, VmathSoaVector3 vec );
+
+/*
+ * Set the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline void vmathSoaT3SetRow_V( VmathSoaTransform3 *result, int row, VmathSoaVector4 vec );
+
+/*
+ * Get the column of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathSoaVector3 vmathSoaT3GetCol_V( VmathSoaTransform3 tfrm, int col );
+
+/*
+ * Get the row of a 3x4 transformation matrix referred to by the specified index
+ */
+static inline VmathSoaVector4 vmathSoaT3GetRow_V( VmathSoaTransform3 tfrm, int row );
+
+/*
+ * Set the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline void vmathSoaT3SetElem_V( VmathSoaTransform3 *result, int col, int row, vec_float4 val );
+
+/*
+ * Get the element of a 3x4 transformation matrix referred to by column and row indices
+ */
+static inline vec_float4 vmathSoaT3GetElem_V( VmathSoaTransform3 tfrm, int col, int row );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D vector
+ */
+static inline VmathSoaVector3 vmathSoaT3MulV3_V( VmathSoaTransform3 tfrm, VmathSoaVector3 vec );
+
+/*
+ * Multiply a 3x4 transformation matrix by a 3-D point
+ */
+static inline VmathSoaPoint3 vmathSoaT3MulP3_V( VmathSoaTransform3 tfrm, VmathSoaPoint3 pnt );
+
+/*
+ * Multiply two 3x4 transformation matrices
+ */
+static inline VmathSoaTransform3 vmathSoaT3Mul_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
+
+/*
+ * Construct an identity 3x4 transformation matrix
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeIdentity_V( );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationX_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the y axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationY_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the z axis
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZ_V( vec_float4 radians );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationZYX_V( VmathSoaVector3 radiansXYZ );
+
+/*
+ * Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationAxis_V( vec_float4 radians, VmathSoaVector3 unitVec );
+
+/*
+ * Construct a rotation matrix from a unit-length quaternion
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeRotationQ_V( VmathSoaQuat unitQuat );
+
+/*
+ * Construct a 3x4 transformation matrix to perform scaling
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeScale_V( VmathSoaVector3 scaleVec );
+
+/*
+ * Construct a 3x4 transformation matrix to perform translation
+ */
+static inline VmathSoaTransform3 vmathSoaT3MakeTranslation_V( VmathSoaVector3 translateVec );
+
+/*
+ * Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaTransform3 vmathSoaT3AppendScale_V( VmathSoaTransform3 tfrm, VmathSoaVector3 scaleVec );
+
+/*
+ * Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+ * NOTE: 
+ * Faster than creating and multiplying a scale transformation matrix.
+ */
+static inline VmathSoaTransform3 vmathSoaT3PrependScale_V( VmathSoaVector3 scaleVec, VmathSoaTransform3 tfrm );
+
+/*
+ * Multiply two 3x4 transformation matrices per element
+ */
+static inline VmathSoaTransform3 vmathSoaT3MulPerElem_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1 );
+
+/*
+ * Compute the absolute value of a 3x4 transformation matrix per element
+ */
+static inline VmathSoaTransform3 vmathSoaT3AbsPerElem_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Inverse of a 3x4 transformation matrix
+ * NOTE: 
+ * Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+ */
+static inline VmathSoaTransform3 vmathSoaT3Inverse_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+ * NOTE: 
+ * This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+ */
+static inline VmathSoaTransform3 vmathSoaT3OrthoInverse_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Conditionally select between two 3x4 transformation matrices
+ * NOTE: 
+ * This function uses a conditional select instruction to avoid a branch.
+ */
+static inline VmathSoaTransform3 vmathSoaT3Select_V( VmathSoaTransform3 tfrm0, VmathSoaTransform3 tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+/*
+ * Print a 3x4 transformation matrix
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Print_V( VmathSoaTransform3 tfrm );
+
+/*
+ * Print a 3x4 transformation matrix and an associated string identifier
+ * NOTE: 
+ * Function is only defined when _VECTORMATH_DEBUG is defined.
+ */
+static inline void vmathSoaT3Prints_V( VmathSoaTransform3 tfrm, const char *name );
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#include "vectormath_soa.h"
+#include "vec_soa_v.h"
+#include "quat_soa_v.h"
+#include "mat_soa_v.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/boolInVec.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/boolInVec.h
index 93a3ad29d..ac535843b 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/boolInVec.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/boolInVec.h
@@ -1,246 +1,246 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _BOOLINVEC_H
-#define _BOOLINVEC_H
-
-#include <spu_intrinsics.h>
-
-namespace Vectormath {
-
-class floatInVec;
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec class
-//
-
-class boolInVec
-{
-    private:
-        vec_uint4 mData;
-
-        inline boolInVec(vec_uint4 vec);
-    public:
-        inline boolInVec() {}
-
-        // matches standard type conversions
-        //
-        inline boolInVec(floatInVec vec);
-
-        // explicit cast from bool
-        //
-        explicit inline boolInVec(bool scalar);
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-        // explicit cast to bool
-        // 
-        inline bool getAsBool() const;
-#else
-        // implicit cast to bool
-        // 
-        inline operator bool() const;
-#endif
-    
-        // get vector data
-        // bool value is in the 0 word slot of vector as 0 (false) or -1 (true)
-        //
-        inline vec_uint4 get128() const;
-
-        // operators
-        //
-        inline const boolInVec operator ! () const;
-        inline boolInVec& operator = (boolInVec vec);
-        inline boolInVec& operator &= (boolInVec vec);
-        inline boolInVec& operator ^= (boolInVec vec);
-        inline boolInVec& operator |= (boolInVec vec);
-
-        // friend functions
-        //
-        friend inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
-        friend inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
-        friend inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
-};
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec functions
-//
-
-// operators
-//
-inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
-inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
-
-// select between vec0 and vec1 using boolInVec.
-// false selects vec0, true selects vec1
-//
-inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
-
-} // namespace Vectormath
-
-//--------------------------------------------------------------------------------------------------
-// boolInVec implementation
-//
-
-#include "floatInVec.h"
-
-namespace Vectormath {
-
-inline
-boolInVec::boolInVec(vec_uint4 vec)
-{
-    mData = vec;
-}
-
-inline
-boolInVec::boolInVec(floatInVec vec)
-{
-    *this = (vec != floatInVec(0.0f));
-}
-
-inline
-boolInVec::boolInVec(bool scalar)
-{
-    mData = spu_promote((unsigned int)-scalar, 0);
-}
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline
-bool
-boolInVec::getAsBool() const
-#else
-inline
-boolInVec::operator bool() const
-#endif
-{
-    return (bool)spu_extract(mData, 0);
-}
-
-inline
-vec_uint4
-boolInVec::get128() const
-{
-    return mData;
-}
-
-inline
-const boolInVec
-boolInVec::operator ! () const
-{
-    return boolInVec(spu_nor(mData, mData));
-}
-
-inline
-boolInVec&
-boolInVec::operator = (boolInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator &= (boolInVec vec)
-{
-    *this = *this & vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator ^= (boolInVec vec)
-{
-    *this = *this ^ vec;
-    return *this;
-}
-
-inline
-boolInVec&
-boolInVec::operator |= (boolInVec vec)
-{
-    *this = *this | vec;
-    return *this;
-}
-
-inline
-const boolInVec
-operator == (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(spu_cmpeq(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator != (boolInVec vec0, boolInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-    
-inline
-const boolInVec
-operator & (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(spu_and(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator | (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(spu_or(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator ^ (boolInVec vec0, boolInVec vec1)
-{
-    return boolInVec(spu_xor(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
-{
-    return boolInVec(spu_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
-}
- 
-} // namespace Vectormath
-
-#endif // boolInVec_h
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <spu_intrinsics.h>
+
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+    private:
+        vec_uint4 mData;
+
+        inline boolInVec(vec_uint4 vec);
+    public:
+        inline boolInVec() {}
+
+        // matches standard type conversions
+        //
+        inline boolInVec(floatInVec vec);
+
+        // explicit cast from bool
+        //
+        explicit inline boolInVec(bool scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to bool
+        // 
+        inline bool getAsBool() const;
+#else
+        // implicit cast to bool
+        // 
+        inline operator bool() const;
+#endif
+    
+        // get vector data
+        // bool value is in the 0 word slot of vector as 0 (false) or -1 (true)
+        //
+        inline vec_uint4 get128() const;
+
+        // operators
+        //
+        inline const boolInVec operator ! () const;
+        inline boolInVec& operator = (boolInVec vec);
+        inline boolInVec& operator &= (boolInVec vec);
+        inline boolInVec& operator ^= (boolInVec vec);
+        inline boolInVec& operator |= (boolInVec vec);
+
+        // friend functions
+        //
+        friend inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+        friend inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+        friend inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec functions
+//
+
+// operators
+//
+inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(vec_uint4 vec)
+{
+    mData = vec;
+}
+
+inline
+boolInVec::boolInVec(floatInVec vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+    mData = spu_promote((unsigned int)-scalar, 0);
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+bool
+boolInVec::getAsBool() const
+#else
+inline
+boolInVec::operator bool() const
+#endif
+{
+    return (bool)spu_extract(mData, 0);
+}
+
+inline
+vec_uint4
+boolInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(spu_nor(mData, mData));
+}
+
+inline
+boolInVec&
+boolInVec::operator = (boolInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (boolInVec vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (boolInVec vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (boolInVec vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(spu_cmpeq(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (boolInVec vec0, boolInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+    
+inline
+const boolInVec
+operator & (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(spu_and(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator | (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(spu_or(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator ^ (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(spu_xor(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
+{
+    return boolInVec(spu_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+ 
+} // namespace Vectormath
+
+#endif // boolInVec_h
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/floatInVec.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/floatInVec.h
index 06d7892ef..638f22f91 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/floatInVec.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/floatInVec.h
@@ -1,340 +1,339 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _FLOATINVEC_H
-#define _FLOATINVEC_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-#include "spu2vmx.h"
-#include "simdmath.h"
-#undef bool
-
-namespace Vectormath {
-
-class boolInVec;
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec class
-//
-
-class floatInVec
-{
-    private:
-        vec_float4 mData;
-
-        inline floatInVec(vec_float4 vec);
-    public:
-        inline floatInVec() {}
-
-        // matches standard type conversions
-        //
-        inline floatInVec(boolInVec vec);
-
-        // construct from a slot of vec_float4
-        //
-        inline floatInVec(vec_float4 vec, int slot);
-        
-        // explicit cast from float
-        //
-        explicit inline floatInVec(float scalar);
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-        // explicit cast to float
-        // 
-        inline float getAsFloat() const;
-#else
-        // implicit cast to float
-        //
-        inline operator float() const;
-#endif
-
-        // get vector data
-        // float value is in 0 word slot of vector
-        //
-        inline vec_float4 get128() const;
-
-        // operators
-        // 
-        inline const floatInVec operator ++ (int);
-        inline const floatInVec operator -- (int);
-        inline floatInVec& operator ++ ();
-        inline floatInVec& operator -- ();
-        inline const floatInVec operator - () const;
-        inline floatInVec& operator = (floatInVec vec);
-        inline floatInVec& operator *= (floatInVec vec);
-        inline floatInVec& operator /= (floatInVec vec);
-        inline floatInVec& operator += (floatInVec vec);
-        inline floatInVec& operator -= (floatInVec vec);
-
-        // friend functions
-        //
-        friend inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
-        friend inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
-};
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec functions
-//
-
-// operators
-// 
-inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
-inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
-inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
-
-// select between vec0 and vec1 using boolInVec.
-// false selects vec0, true selects vec1
-//
-inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
-
-} // namespace Vectormath
-
-//--------------------------------------------------------------------------------------------------
-// floatInVec implementation
-//
-
-#include "boolInVec.h"
-
-namespace Vectormath {
-
-inline
-floatInVec::floatInVec(vec_float4 vec)
-{
-    mData = vec;
-}
-
-inline
-floatInVec::floatInVec(boolInVec vec)
-{
-    mData = spu_sel(spu_splats(0.0f), spu_splats(1.0f), vec.get128());
-}
-
-inline
-floatInVec::floatInVec(vec_float4 vec, int slot)
-{
-    mData = spu_promote(spu_extract(vec, slot), 0);
-}
-
-inline
-floatInVec::floatInVec(float scalar)
-{
-    mData = spu_promote(scalar, 0);
-}
-
-#ifdef _VECTORMATH_NO_SCALAR_CAST
-inline
-float
-floatInVec::getAsFloat() const
-#else
-inline
-floatInVec::operator float() const
-#endif
-{
-    return spu_extract(mData,0);
-}
-
-inline
-vec_float4
-floatInVec::get128() const
-{
-    return mData;
-}
-
-inline
-const floatInVec
-floatInVec::operator ++ (int)
-{
-    vec_float4 olddata = mData;
-    operator ++();
-    return floatInVec(olddata);
-}
-
-inline
-const floatInVec
-floatInVec::operator -- (int)
-{
-    vec_float4 olddata = mData;
-    operator --();
-    return floatInVec(olddata);
-}
-
-inline
-floatInVec&
-floatInVec::operator ++ ()
-{
-    *this += floatInVec(1.0f);
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -- ()
-{
-    *this -= floatInVec(1.0f);
-    return *this;
-}
-
-inline
-const floatInVec
-floatInVec::operator - () const
-{
-    return floatInVec((vec_float4)spu_xor((vec_uint4)mData, spu_splats(0x80000000)));
-}
-
-inline
-floatInVec&
-floatInVec::operator = (floatInVec vec)
-{
-    mData = vec.mData;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator *= (floatInVec vec)
-{
-    *this = *this * vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator /= (floatInVec vec)
-{
-    *this = *this / vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator += (floatInVec vec)
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline
-floatInVec&
-floatInVec::operator -= (floatInVec vec)
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline
-const floatInVec
-operator * (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(spu_mul(vec0.get128(), vec1.get128()));
-}
-
-inline
-const floatInVec
-operator / (floatInVec num, floatInVec den)
-{
-    return floatInVec(divf4(num.get128(), den.get128()));
-}
-
-inline
-const floatInVec
-operator + (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(spu_add(vec0.get128(), vec1.get128()));
-}
-
-inline
-const floatInVec
-operator - (floatInVec vec0, floatInVec vec1)
-{
-    return floatInVec(spu_sub(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator < (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(spu_cmpgt(vec1.get128(), vec0.get128()));
-}
-
-inline
-const boolInVec
-operator <= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 > vec1);
-}
-
-inline
-const boolInVec
-operator > (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(spu_cmpgt(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator >= (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 < vec1);
-}
-
-inline
-const boolInVec
-operator == (floatInVec vec0, floatInVec vec1)
-{
-    return boolInVec(spu_cmpeq(vec0.get128(), vec1.get128()));
-}
-
-inline
-const boolInVec
-operator != (floatInVec vec0, floatInVec vec1)
-{
-    return !(vec0 == vec1);
-}
-    
-inline
-const floatInVec
-select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
-{
-    return floatInVec(spu_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
-}
-
-} // namespace Vectormath
-
-#endif // floatInVec_h
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+#include <simdmath.h>
+#undef bool
+
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+class floatInVec
+{
+    private:
+        vec_float4 mData;
+
+        inline floatInVec(vec_float4 vec);
+    public:
+        inline floatInVec() {}
+
+        // matches standard type conversions
+        //
+        inline floatInVec(boolInVec vec);
+
+        // construct from a slot of vec_float4
+        //
+        inline floatInVec(vec_float4 vec, int slot);
+        
+        // explicit cast from float
+        //
+        explicit inline floatInVec(float scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to float
+        // 
+        inline float getAsFloat() const;
+#else
+        // implicit cast to float
+        //
+        inline operator float() const;
+#endif
+
+        // get vector data
+        // float value is in 0 word slot of vector
+        //
+        inline vec_float4 get128() const;
+
+        // operators
+        // 
+        inline const floatInVec operator ++ (int);
+        inline const floatInVec operator -- (int);
+        inline floatInVec& operator ++ ();
+        inline floatInVec& operator -- ();
+        inline const floatInVec operator - () const;
+        inline floatInVec& operator = (floatInVec vec);
+        inline floatInVec& operator *= (floatInVec vec);
+        inline floatInVec& operator /= (floatInVec vec);
+        inline floatInVec& operator += (floatInVec vec);
+        inline floatInVec& operator -= (floatInVec vec);
+
+        // friend functions
+        //
+        friend inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+        friend inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec functions
+//
+
+// operators
+// 
+inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(vec_float4 vec)
+{
+    mData = vec;
+}
+
+inline
+floatInVec::floatInVec(boolInVec vec)
+{
+    mData = spu_sel(spu_splats(0.0f), spu_splats(1.0f), vec.get128());
+}
+
+inline
+floatInVec::floatInVec(vec_float4 vec, int slot)
+{
+    mData = spu_promote(spu_extract(vec, slot), 0);
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+    mData = spu_promote(scalar, 0);
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+float
+floatInVec::getAsFloat() const
+#else
+inline
+floatInVec::operator float() const
+#endif
+{
+    return spu_extract(mData,0);
+}
+
+inline
+vec_float4
+floatInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    vec_float4 olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    vec_float4 olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec(1.0f);
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec(1.0f);
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec((vec_float4)spu_xor((vec_uint4)mData, spu_splats(0x80000000)));
+}
+
+inline
+floatInVec&
+floatInVec::operator = (floatInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (floatInVec vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (floatInVec vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (floatInVec vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (floatInVec vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(spu_mul(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator / (floatInVec num, floatInVec den)
+{
+    return floatInVec(divf4(num.get128(), den.get128()));
+}
+
+inline
+const floatInVec
+operator + (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(spu_add(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator - (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(spu_sub(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator < (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(spu_cmpgt(vec1.get128(), vec0.get128()));
+}
+
+inline
+const boolInVec
+operator <= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 > vec1);
+}
+
+inline
+const boolInVec
+operator > (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(spu_cmpgt(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator >= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 < vec1);
+}
+
+inline
+const boolInVec
+operator == (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(spu_cmpeq(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+    
+inline
+const floatInVec
+select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
+{
+    return floatInVec(spu_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_aos.h
index a2fd611ec..d4f955c23 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_aos.h
@@ -1,2027 +1,2027 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_AOS_CPP_H
-#define _VECTORMATH_MAT_AOS_CPP_H
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Constants
-// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
-#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
-#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
-#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
-#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( Quat unitQuat )
-{
-    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_uint4 select_x = (vec_uint4)spu_maskb(0xf000);
-    vec_uint4 select_z = (vec_uint4)spu_maskb(0x00f0);
-    xyzw_2 = spu_add( unitQuat.get128(), unitQuat.get128() );
-    wwww = spu_shuffle( unitQuat.get128(), unitQuat.get128(), shuffle_wwww );
-    yzxw = spu_shuffle( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_SHUF_YZXW );
-    zxyw = spu_shuffle( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_SHUF_ZXYW );
-    yzxw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_YZXW );
-    zxyw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_ZXYW );
-    tmp0 = spu_mul( yzxw_2, wwww );
-    tmp1 = spu_nmsub( yzxw, yzxw_2, spu_splats(1.0f) );
-    tmp2 = spu_mul( yzxw, xyzw_2 );
-    tmp0 = spu_madd( zxyw, xyzw_2, tmp0 );
-    tmp1 = spu_nmsub( zxyw, zxyw_2, tmp1 );
-    tmp2 = spu_nmsub( zxyw_2, wwww, tmp2 );
-    tmp3 = spu_sel( tmp0, tmp1, select_x );
-    tmp4 = spu_sel( tmp1, tmp2, select_x );
-    tmp5 = spu_sel( tmp2, tmp0, select_x );
-    mCol0 = Vector3( spu_sel( tmp3, tmp2, select_z ) );
-    mCol1 = Vector3( spu_sel( tmp4, tmp0, select_z ) );
-    mCol2 = Vector3( spu_sel( tmp5, tmp1, select_z ) );
-}
-
-inline Matrix3::Matrix3( Vector3 _col0, Vector3 _col1, Vector3 _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3 & Matrix3::setCol0( Vector3 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( Vector3 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( Vector3 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, Vector3 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, Vector3 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline float Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, res0, res1, res2;
-    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
-    res0 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
-    res1 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
-    res2 = spu_shuffle( tmp1, mat.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
-    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
-    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
-    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
-    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
-    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
-    invdet = recipf4( dot );
-    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
-    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
-    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
-    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
-    inv0 = spu_mul( inv0, invdet );
-    inv1 = spu_mul( inv1, invdet );
-    inv2 = spu_mul( inv2, invdet );
-    return Matrix3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 )
-    );
-}
-
-inline float determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( float scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
-    res = spu_mul( mCol0.get128(), xxxx );
-    res = spu_madd( mCol1.get128(), yyyy, res );
-    res = spu_madd( mCol2.get128(), zzzz, res );
-    return Vector3( res );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3::yAxis( ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ.get128();
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    return Matrix3(
-        Vector3( spu_mul( Z0, Y0 ) ),
-        Vector3( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
-        Vector3( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( float radians, Vector3 unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    axis = unitVec.get128();
-    sincosf4( spu_splats( radians ), &s, &c );
-    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
-    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
-    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    axisS = spu_mul( axis, s );
-    negAxisS = negatef4( axisS );
-    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
-    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
-    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
-    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
-    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
-    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
-    return Matrix3(
-        Vector3( spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
-        Vector3( spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
-        Vector3( spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( Quat unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    return Matrix3(
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( float scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( mat.getCol3(), 1.0f );
-}
-
-inline Matrix4::Matrix4( Vector4 _col0, Vector4 _col1, Vector4 _col2, Vector4 _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, Vector3 translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4::Matrix4( Quat unitQuat, Vector3 translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), 0.0f );
-    mCol1 = Vector4( mat.getCol1(), 0.0f );
-    mCol2 = Vector4( mat.getCol2(), 0.0f );
-    mCol3 = Vector4( translateVec, 1.0f );
-}
-
-inline Matrix4 & Matrix4::setCol0( Vector4 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( Vector4 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( Vector4 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( Vector4 _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, Vector4 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, Vector4 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline float Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
-    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat.getCol1().get128(), mat.getCol3().get128(), _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( mat.getCol1().get128(), mat.getCol3().get128(), _VECTORMATH_SHUF_ZCWD );
-    res0 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    res1 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    res2 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    res3 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4( res1 ),
-        Vector4( res2 ),
-        Vector4( res3 )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 in0, in1, in2, in3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    vec_float4 cof0, cof1, cof2, cof3;
-    vec_float4 t0, t1, t2, t3;
-    vec_float4 t01, t02, t03, t12, t23;
-    vec_float4 t1r, t2r;
-    vec_float4 t01r, t02r, t03r, t12r, t23r;
-    vec_float4 t1r3, t1r3r;
-    vec_float4 det, det1, det2, det3, invdet;
-    in0 = mat.getCol0().get128();
-    in1 = mat.getCol1().get128();
-    in2 = mat.getCol2().get128();
-    in3 = mat.getCol3().get128();
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
-    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
-    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
-    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
-    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
-    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
-    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
-    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
-    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
-    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
-    cof1 = spu_mul(t0, t23);                      /* AGP ECL IOH MKD */
-    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
-    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
-    cof1 = spu_msub(t0, t23r, cof1);              /* AOH EKD IGP MCL  - cof1 */
-    cof1 = spu_rlqwbyte(cof1, 8);                 /* IGP MCL AOH EKD - IOH MKD AGP ECL */
-
-    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
-    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
-    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
-    cof3 = spu_mul(t0, t12);                      /* ANG EJC IFO MBK */
-    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
-    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
-    cof3 = spu_msub(t0, t12r, cof3);              /* AFO EBK ING MJC - cof3 */
-    cof3 = spu_rlqwbyte(cof3, 8);                 /* ING MJC AFO EBK - IFO MBK ANG EJC */
-    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
-    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
-    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
-    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
-    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
-    cof2 = spu_mul(t0, t1r3);                     /* AFP EBL INH MJD */
-    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
-    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
-    cof2 = spu_msub(t0, t1r3r, cof2);             /* ANH EJD IFP MBL - cof2 */
-    cof2 = spu_rlqwbyte(cof2, 8);                 /* IFP MBL ANH EJD - INH MJD AFP EBL */
-    t01 = spu_mul(t0, t1);                                /* AJ EN IB MF */
-    t01 = spu_shuffle(t01, t01, _VECTORMATH_SHUF_YXWZ);     /* EN AJ MF IB */
-    cof2 = spu_madd(t3, t01, cof2);               /* LEN PAJ DMF HIB + cof2 */
-    cof3 = spu_msub(t2r, t01, cof3);              /* KEN OAJ CMF GIB - cof3 */
-    t01r = spu_rlqwbyte(t01, 8);                  /* MF IB EN AJ */
-    cof2 = spu_msub(t3, t01r, cof2);              /* LMF PIB DEN HAJ - cof2 */
-    cof3 = spu_nmsub(t2r, t01r, cof3);            /* cof3 - KMF OIB CEN GAJ */
-    t03 = spu_mul(t0, t3);                                /* AL EP ID MH */
-    t03 = spu_shuffle(t03, t03, _VECTORMATH_SHUF_YXWZ);     /* EP AL MH ID */
-    cof1 = spu_nmsub(t2r, t03, cof1);             /* cof1 - KEP OAL CMH GID */
-    cof2 = spu_madd(t1, t03, cof2);               /* JEP NAL BMH FID + cof2 */
-    t03r = spu_rlqwbyte(t03, 8);                  /* MH ID EP AL */
-    cof1 = spu_madd(t2r, t03r, cof1);             /* KMH OID CEP GAL + cof1 */
-    cof2 = spu_nmsub(t1, t03r, cof2);             /* cof2 - JMH NID BEP FAL */
-    t02 = spu_mul(t0, t2r);                       /* AK EO IC MG */
-    t02 = spu_shuffle(t02, t02, _VECTORMATH_SHUF_YXWZ);     /* E0 AK MG IC */
-    cof1 = spu_madd(t3, t02, cof1);               /* LEO PAK DMG HIC + cof1 */
-    cof3 = spu_nmsub(t1, t02, cof3);              /* cof3 - JEO NAK BMG FIC */
-    t02r = spu_rlqwbyte(t02, 8);                  /* MG IC EO AK */
-    cof1 = spu_nmsub(t3, t02r, cof1);             /* cof1 - LMG PIC DEO HAK */
-    cof3 = spu_madd(t1, t02r, cof3);              /* JMG NIC BEO FAK + cof3 */
-    /* Compute the determinant of the matrix
-     *
-     * det = sum_across(t0 * cof0);
-     *
-     * We perform a sum across the entire vector so that
-     * we don't have to splat the result when multiplying the
-     * cofactors by the inverse of the determinant.
-     */
-    det  = spu_mul(t0, cof0);
-    det1 = spu_rlqwbyte(det, 4);
-    det2 = spu_rlqwbyte(det, 8);
-    det3 = spu_rlqwbyte(det, 12);
-    det  = spu_add(det, det1);
-    det2 = spu_add(det2, det3);
-    det  = spu_add(det, det2);
-    /* Compute the reciprocal of the determinant.
-     */
-    invdet = recipf4(det);
-    /* Multiply the cofactors by the reciprocal of the determinant.
-     */
-    return Matrix4(
-        Vector4( spu_mul(cof0, invdet) ),
-        Vector4( spu_mul(cof1, invdet) ),
-        Vector4( spu_mul(cof2, invdet) ),
-        Vector4( spu_mul(cof3, invdet) )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline float determinant( const Matrix4 & mat )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 in0, in1, in2, in3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    vec_float4 cof0;
-    vec_float4 t0, t1, t2, t3;
-    vec_float4 t12, t23;
-    vec_float4 t1r, t2r;
-    vec_float4 t12r, t23r;
-    vec_float4 t1r3, t1r3r;
-    in0 = mat.getCol0().get128();
-    in1 = mat.getCol1().get128();
-    in2 = mat.getCol2().get128();
-    in3 = mat.getCol3().get128();
-    /* Perform transform of the input matrix of the form:
-     *    A B C D
-     *    E F G H
-     *    I J K L
-     *    M N O P
-     *
-     * The pseudo transpose of the input matrix is trans:
-     *    A E I M
-     *    J N B F
-     *    C G K O
-     *    L P D H
-     */
-    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
-    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
-    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
-    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
-    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
-    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
-    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
-    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
-    /* Generate a cofactor matrix. The computed cofactors reside in
-     * cof0, cof1, cof2, cof3.
-     */
-    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
-    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
-    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
-    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
-    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
-
-    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
-    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
-    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
-    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
-    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
-    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
-    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
-    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
-    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
-    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
-    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
-    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
-    return spu_extract( _vmathVfDot4(t0,cof0), 0 );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( float scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( Vector4 vec ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz, wwww;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
-    wwww = spu_shuffle( vec.get128(), vec.get128(), shuffle_wwww );
-    tmp0 = spu_mul( mCol0.get128(), xxxx );
-    tmp1 = spu_mul( mCol1.get128(), yyyy );
-    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = spu_madd( mCol3.get128(), wwww, tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    return Vector4( res );
-}
-
-inline const Vector4 Matrix4::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
-    res = spu_mul( mCol0.get128(), xxxx );
-    res = spu_madd( mCol1.get128(), yyyy, res );
-    res = spu_madd( mCol2.get128(), zzzz, res );
-    return Vector4( res );
-}
-
-inline const Vector4 Matrix4::operator *( Point3 pnt ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_zzzz );
-    tmp0 = spu_mul( mCol0.get128(), xxxx );
-    tmp1 = spu_mul( mCol1.get128(), yyyy );
-    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = spu_add( mCol3.get128(), tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    return Vector4( res );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( Vector3 translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( res1 ),
-        Vector4( res2 ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4::yAxis( ),
-        Vector4( res2 ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    return Matrix4(
-        Vector4( res0 ),
-        Vector4( res1 ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ.get128();
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    return Matrix4(
-        Vector4( spu_mul( Z0, Y0 ) ),
-        Vector4( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
-        Vector4( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( float radians, Vector3 unitVec )
-{
-    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    axis = unitVec.get128();
-    sincosf4( spu_splats( radians ), &s, &c );
-    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
-    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
-    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    axisS = spu_mul( axis, s );
-    negAxisS = negatef4( axisS );
-    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
-    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
-    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
-    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
-    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
-    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
-    zeroW = (vec_float4)spu_maskb(0x000f);
-    axis = spu_andc( axis, zeroW );
-    return Matrix4(
-        Vector4( spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
-        Vector4( spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
-        Vector4( spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( Quat unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    return Matrix4(
-        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
-        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
-        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, 1.0f );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( Vector3 translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, 1.0f )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
-{
-    float f, rangeInv;
-    vec_float4 zero, col0, col1, col2, col3;
-    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
-    rangeInv = 1.0f / ( zNear - zFar );
-    zero = spu_splats(0.0f);
-    col0 = zero;
-    col1 = zero;
-    col2 = zero;
-    col3 = zero;
-    col0 = spu_insert( f / aspect, col0, 0 );
-    col1 = spu_insert( f, col1, 1 );
-    col2 = spu_insert( ( zNear + zFar ) * rangeInv, col2, 2 );
-    col2 = spu_insert( -1.0f, col2, 3 );
-    col3 = spu_insert( zNear * zFar * rangeInv * 2.0f, col3, 2 );
-    return Matrix4(
-        Vector4( col0 ),
-        Vector4( col1 ),
-        Vector4( col2 ),
-        Vector4( col3 )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff;
-    vec_float4 diagonal, column, near2;
-    vec_float4 zero = spu_splats(0.0f);
-    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
-    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
-    diff = spu_sub( rtn, lbf );
-    sum  = spu_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    near2 = spu_splats( zNear );
-    near2 = spu_add( near2, near2 );
-    diagonal = spu_mul( near2, inv_diff );
-    column = spu_mul( sum, inv_diff );
-    return Matrix4(
-        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) ) ),
-        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) ) ),
-        Vector4( spu_sel( column, spu_splats(-1.0f), (vec_uint4)spu_maskb(0x000f) ) ),
-        Vector4( spu_sel( zero, spu_mul( diagonal, spu_splats(zFar) ), (vec_uint4)spu_maskb(0x00f0) ) )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
-{
-    /* function implementation based on code from STIDC SDK:           */
-    /* --------------------------------------------------------------  */
-    /* PLEASE DO NOT MODIFY THIS SECTION                               */
-    /* This prolog section is automatically generated.                 */
-    /*                                                                 */
-    /* (C)Copyright                                                    */
-    /* Sony Computer Entertainment, Inc.,                              */
-    /* Toshiba Corporation,                                            */
-    /* International Business Machines Corporation,                    */
-    /* 2001,2002.                                                      */
-    /* S/T/I Confidential Information                                  */
-    /* --------------------------------------------------------------  */
-    vec_float4 lbf, rtn;
-    vec_float4 diff, sum, inv_diff, neg_inv_diff;
-    vec_float4 diagonal, column;
-    vec_float4 zero = spu_splats(0.0f);
-    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
-    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
-    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
-    diff = spu_sub( rtn, lbf );
-    sum  = spu_add( rtn, lbf );
-    inv_diff = recipf4( diff );
-    neg_inv_diff = negatef4( inv_diff );
-    diagonal = spu_add( inv_diff, inv_diff );
-    column = spu_mul( sum, spu_sel( neg_inv_diff, inv_diff, (vec_uint4)spu_maskb(0x00f0) ) );
-    return Matrix4(
-        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) ) ),
-        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) ) ),
-        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x00f0) ) ),
-        Vector4( spu_sel( column, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) ) )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    print( mat.getRow( 0 ) );
-    print( mat.getRow( 1 ) );
-    print( mat.getRow( 2 ) );
-    print( mat.getRow( 3 ) );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( float scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( Vector3 _col0, Vector3 _col1, Vector3 _col2, Vector3 _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, Vector3 translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( Quat unitQuat, Vector3 translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3 & Transform3::setCol0( Vector3 _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( Vector3 _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( Vector3 _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( Vector3 _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, Vector3 vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, Vector4 vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, float val )
-{
-    (*this)[col].setElem(row, val);
-    return *this;
-}
-
-inline float Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
-    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
-    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
-    inv3 = negatef4( tfrm.getCol3().get128() );
-    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
-    dot = spu_shuffle( dot, dot, shuffle_xxxx );
-    invdet = recipf4( dot );
-    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
-    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
-    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
-    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
-    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
-    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
-    inv3 = spu_mul( inv0, xxxx );
-    inv3 = spu_madd( inv1, yyyy, inv3 );
-    inv3 = spu_madd( inv2, zzzz, inv3 );
-    inv0 = spu_mul( inv0, invdet );
-    inv1 = spu_mul( inv1, invdet );
-    inv2 = spu_mul( inv2, invdet );
-    inv3 = spu_mul( inv3, invdet );
-    return Transform3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 ),
-        Vector3( inv3 )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    vec_float4 inv0, inv1, inv2, inv3;
-    vec_float4 tmp0, tmp1;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp0 = spu_shuffle( tfrm.getCol0().get128(), tfrm.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( tfrm.getCol0().get128(), tfrm.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
-    inv3 = negatef4( tfrm.getCol3().get128() );
-    inv0 = spu_shuffle( tmp0, tfrm.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
-    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
-    inv1 = spu_shuffle( tmp0, tfrm.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
-    inv2 = spu_shuffle( tmp1, tfrm.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
-    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
-    inv3 = spu_mul( inv0, xxxx );
-    inv3 = spu_madd( inv1, yyyy, inv3 );
-    inv3 = spu_madd( inv2, zzzz, inv3 );
-    return Transform3(
-        Vector3( inv0 ),
-        Vector3( inv1 ),
-        Vector3( inv2 ),
-        Vector3( inv3 )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( Vector3 vec ) const
-{
-    vec_float4 res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
-    res = spu_mul( mCol0.get128(), xxxx );
-    res = spu_madd( mCol1.get128(), yyyy, res );
-    res = spu_madd( mCol2.get128(), zzzz, res );
-    return Vector3( res );
-}
-
-inline const Point3 Transform3::operator *( Point3 pnt ) const
-{
-    vec_float4 tmp0, tmp1, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    xxxx = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_xxxx );
-    yyyy = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_yyyy );
-    zzzz = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_zzzz );
-    tmp0 = spu_mul( mCol0.get128(), xxxx );
-    tmp1 = spu_mul( mCol1.get128(), yyyy );
-    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
-    tmp1 = spu_add( mCol3.get128(), tmp1 );
-    res = spu_add( tmp0, tmp1 );
-    return Point3( res );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( Vector3 translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( float radians )
-{
-    vec_float4 s, c, res1, res2;
-    vec_uint4 select_y, select_z;
-    vec_float4 zero;
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res1 = spu_sel( zero, c, select_y );
-    res1 = spu_sel( res1, s, select_z );
-    res2 = spu_sel( zero, negatef4(s), select_y );
-    res2 = spu_sel( res2, c, select_z );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( res1 ),
-        Vector3( res2 ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( float radians )
-{
-    vec_float4 s, c, res0, res2;
-    vec_uint4 select_x, select_z;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_z = (vec_uint4)spu_maskb(0x00f0);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, negatef4(s), select_z );
-    res2 = spu_sel( zero, s, select_x );
-    res2 = spu_sel( res2, c, select_z );
-    return Transform3(
-        Vector3( res0 ),
-        Vector3::yAxis( ),
-        Vector3( res2 ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( float radians )
-{
-    vec_float4 s, c, res0, res1;
-    vec_uint4 select_x, select_y;
-    vec_float4 zero;
-    select_x = (vec_uint4)spu_maskb(0xf000);
-    select_y = (vec_uint4)spu_maskb(0x0f00);
-    zero = spu_splats(0.0f);
-    sincosf4( spu_splats(radians), &s, &c );
-    res0 = spu_sel( zero, c, select_x );
-    res0 = spu_sel( res0, s, select_y );
-    res1 = spu_sel( zero, negatef4(s), select_x );
-    res1 = spu_sel( res1, c, select_y );
-    return Transform3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3::zAxis( ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( Vector3 radiansXYZ )
-{
-    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    angles = radiansXYZ.get128();
-    angles = spu_insert( 0.0f, angles, 3 );
-    sincosf4( angles, &s, &c );
-    negS = negatef4( s );
-    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
-    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
-    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
-    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
-    X0 = spu_shuffle( s, s, shuffle_xxxx );
-    X1 = spu_shuffle( c, c, shuffle_xxxx );
-    tmp = spu_mul( Z0, Y1 );
-    return Transform3(
-        Vector3( spu_mul( Z0, Y0 ) ),
-        Vector3( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
-        Vector3( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 Transform3::rotation( float radians, Vector3 unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::rotation( Quat unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
-}
-
-inline const Transform3 Transform3::scale( Vector3 scaleVec )
-{
-    vec_float4 zero = spu_splats(0.0f);
-    return Transform3(
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
-        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) ),
-        Vector3( 0.0f )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( Vector3 translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    print( tfrm.getRow( 0 ) );
-    print( tfrm.getRow( 1 ) );
-    print( tfrm.getRow( 2 ) );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    vec_float4 res;
-    vec_float4 col0, col1, col2;
-    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
-    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
-    vec_float4 radicand, invSqrt, scale;
-    vec_float4 res0, res1, res2, res3;
-    vec_float4 xx, yy, zz;
-    vec_uint4 select_x = (vec_uint4)spu_maskb( 0xf000 );
-    vec_uint4 select_y = (vec_uint4)spu_maskb( 0x0f00 );
-    vec_uint4 select_z = (vec_uint4)spu_maskb( 0x00f0 );
-    vec_uint4 select_w = (vec_uint4)spu_maskb( 0x000f );
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((unsigned int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((unsigned int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((unsigned int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((unsigned int)0x0c0d0e0f);
-
-    col0 = tfrm.getCol0().get128();
-    col1 = tfrm.getCol1().get128();
-    col2 = tfrm.getCol2().get128();
-
-    /* four cases: */
-    /* trace > 0 */
-    /* else */
-    /*    xx largest diagonal element */
-    /*    yy largest diagonal element */
-    /*    zz largest diagonal element */
-
-    /* compute quaternion for each case */
-
-    xx_yy = spu_sel( col0, col1, select_y );
-    xx_yy_zz_xx = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_XYCX );
-    yy_zz_xx_yy = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_YCXY );
-    zz_xx_yy_zz = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_CXYC );
-
-    diagSum = spu_add( spu_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    diagDiff = spu_sub( spu_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
-    radicand = spu_add( spu_sel( diagDiff, diagSum, select_w ), spu_splats(1.0f) );
-    invSqrt = rsqrtf4( radicand );
-
-    zy_xz_yx = spu_sel( col0, col1, select_z );
-    zy_xz_yx = spu_shuffle( zy_xz_yx, col2, _VECTORMATH_SHUF_ZAY0 );
-    yz_zx_xy = spu_sel( col0, col1, select_x );
-    yz_zx_xy = spu_shuffle( yz_zx_xy, col2, _VECTORMATH_SHUF_BZX0 );
-
-    sum = spu_add( zy_xz_yx, yz_zx_xy );
-    diff = spu_sub( zy_xz_yx, yz_zx_xy );
-
-    scale = spu_mul( invSqrt, spu_splats(0.5f) );
-    res0 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_0ZYA );
-    res1 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_Z0XB );
-    res2 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_YX0C );
-    res3 = diff;
-    res0 = spu_sel( res0, radicand, select_x );
-    res1 = spu_sel( res1, radicand, select_y );
-    res2 = spu_sel( res2, radicand, select_z );
-    res3 = spu_sel( res3, radicand, select_w );
-    res0 = spu_mul( res0, spu_shuffle( scale, scale, shuffle_xxxx ) );
-    res1 = spu_mul( res1, spu_shuffle( scale, scale, shuffle_yyyy ) );
-    res2 = spu_mul( res2, spu_shuffle( scale, scale, shuffle_zzzz ) );
-    res3 = spu_mul( res3, spu_shuffle( scale, scale, shuffle_wwww ) );
-
-    /* determine case and select answer */
-
-    xx = spu_shuffle( col0, col0, shuffle_xxxx );
-    yy = spu_shuffle( col1, col1, shuffle_yyyy );
-    zz = spu_shuffle( col2, col2, shuffle_zzzz );
-    res = spu_sel( res0, res1, spu_cmpgt( yy, xx ) );
-    res = spu_sel( res, res2, spu_and( spu_cmpgt( zz, xx ), spu_cmpgt( zz, yy ) ) );
-    res = spu_sel( res, res3, spu_cmpgt( spu_shuffle( diagSum, diagSum, shuffle_xxxx ), spu_splats(0.0f) ) );
-    mVec128 = res;
-}
-
-inline const Matrix3 outer( Vector3 tfrm0, Vector3 tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( Vector4 tfrm0, Vector4 tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat )
-{
-    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
-    vec_float4 xxxx, yyyy, zzzz;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
-    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
-    mcol0 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
-    mcol1 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
-    mcol2 = spu_shuffle( tmp1, mat.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
-    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
-    res = spu_mul( mcol0, xxxx );
-    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
-    res = spu_madd( mcol1, yyyy, res );
-    res = spu_madd( mcol2, zzzz, res );
-    return Vector3( res );
-}
-
-inline const Matrix3 crossMatrix( Vector3 vec )
-{
-    vec_float4 neg, res0, res1, res2;
-    neg = negatef4( vec.get128() );
-    res0 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_0ZB0 );
-    res1 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_C0X0 );
-    res2 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_YA00 );
-    return Matrix3(
-        Vector3( res0 ),
-        Vector3( res1 ),
-        Vector3( res2 )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_0ZB0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_0 })     
+#define _VECTORMATH_SHUF_C0X0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_YA00 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_Z })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X })
+#define _VECTORMATH_SHUF_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y })
+#define _VECTORMATH_SHUF_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZAY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BZX0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_0ZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A })
+#define _VECTORMATH_SHUF_Z0XB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YX0C ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_0, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_CZD0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_BBY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( Quat unitQuat )
+{
+    vec_float4 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_uint4 select_x = (vec_uint4)spu_maskb(0xf000);
+    vec_uint4 select_z = (vec_uint4)spu_maskb(0x00f0);
+    xyzw_2 = spu_add( unitQuat.get128(), unitQuat.get128() );
+    wwww = spu_shuffle( unitQuat.get128(), unitQuat.get128(), shuffle_wwww );
+    yzxw = spu_shuffle( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_SHUF_YZXW );
+    zxyw = spu_shuffle( unitQuat.get128(), unitQuat.get128(), _VECTORMATH_SHUF_ZXYW );
+    yzxw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_YZXW );
+    zxyw_2 = spu_shuffle( xyzw_2, xyzw_2, _VECTORMATH_SHUF_ZXYW );
+    tmp0 = spu_mul( yzxw_2, wwww );
+    tmp1 = spu_nmsub( yzxw, yzxw_2, spu_splats(1.0f) );
+    tmp2 = spu_mul( yzxw, xyzw_2 );
+    tmp0 = spu_madd( zxyw, xyzw_2, tmp0 );
+    tmp1 = spu_nmsub( zxyw, zxyw_2, tmp1 );
+    tmp2 = spu_nmsub( zxyw_2, wwww, tmp2 );
+    tmp3 = spu_sel( tmp0, tmp1, select_x );
+    tmp4 = spu_sel( tmp1, tmp2, select_x );
+    tmp5 = spu_sel( tmp2, tmp0, select_x );
+    mCol0 = Vector3( spu_sel( tmp3, tmp2, select_z ) );
+    mCol1 = Vector3( spu_sel( tmp4, tmp0, select_z ) );
+    mCol2 = Vector3( spu_sel( tmp5, tmp1, select_z ) );
+}
+
+inline Matrix3::Matrix3( Vector3 _col0, Vector3 _col1, Vector3 _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3 & Matrix3::setCol0( Vector3 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( Vector3 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( Vector3 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, Vector3 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, Vector3 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline float Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, res0, res1, res2;
+    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
+    res0 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
+    res1 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
+    res2 = spu_shuffle( tmp1, mat.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
+    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
+    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
+    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
+    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
+    invdet = recipf4( dot );
+    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
+    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
+    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
+    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
+    inv0 = spu_mul( inv0, invdet );
+    inv1 = spu_mul( inv1, invdet );
+    inv2 = spu_mul( inv2, invdet );
+    return Matrix3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 )
+    );
+}
+
+inline float determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
+    res = spu_mul( mCol0.get128(), xxxx );
+    res = spu_madd( mCol1.get128(), yyyy, res );
+    res = spu_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ.get128();
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    return Matrix3(
+        Vector3( spu_mul( Z0, Y0 ) ),
+        Vector3( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
+        Vector3( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( float radians, Vector3 unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    axis = unitVec.get128();
+    sincosf4( spu_splats( radians ), &s, &c );
+    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
+    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
+    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    axisS = spu_mul( axis, s );
+    negAxisS = negatef4( axisS );
+    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
+    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
+    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
+    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
+    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
+    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
+    return Matrix3(
+        Vector3( spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
+        Vector3( spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
+        Vector3( spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( Quat unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    return Matrix3(
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+inline Matrix4::Matrix4( Vector4 _col0, Vector4 _col1, Vector4 _col2, Vector4 _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, Vector3 translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4::Matrix4( Quat unitQuat, Vector3 translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4 & Matrix4::setCol0( Vector4 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( Vector4 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( Vector4 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( Vector4 _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, Vector4 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, Vector4 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline float Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat.getCol1().get128(), mat.getCol3().get128(), _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( mat.getCol1().get128(), mat.getCol3().get128(), _VECTORMATH_SHUF_ZCWD );
+    res0 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    res1 = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    res2 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    res3 = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4( res3 )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 in0, in1, in2, in3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    vec_float4 cof0, cof1, cof2, cof3;
+    vec_float4 t0, t1, t2, t3;
+    vec_float4 t01, t02, t03, t12, t23;
+    vec_float4 t1r, t2r;
+    vec_float4 t01r, t02r, t03r, t12r, t23r;
+    vec_float4 t1r3, t1r3r;
+    vec_float4 det, det1, det2, det3, invdet;
+    in0 = mat.getCol0().get128();
+    in1 = mat.getCol1().get128();
+    in2 = mat.getCol2().get128();
+    in3 = mat.getCol3().get128();
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
+    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
+    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
+    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
+    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
+    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
+    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
+    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
+    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
+    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
+    cof1 = spu_mul(t0, t23);                      /* AGP ECL IOH MKD */
+    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
+    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
+    cof1 = spu_msub(t0, t23r, cof1);              /* AOH EKD IGP MCL  - cof1 */
+    cof1 = spu_rlqwbyte(cof1, 8);                 /* IGP MCL AOH EKD - IOH MKD AGP ECL */
+
+    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
+    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
+    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
+    cof3 = spu_mul(t0, t12);                      /* ANG EJC IFO MBK */
+    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
+    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
+    cof3 = spu_msub(t0, t12r, cof3);              /* AFO EBK ING MJC - cof3 */
+    cof3 = spu_rlqwbyte(cof3, 8);                 /* ING MJC AFO EBK - IFO MBK ANG EJC */
+    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
+    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
+    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
+    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
+    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
+    cof2 = spu_mul(t0, t1r3);                     /* AFP EBL INH MJD */
+    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
+    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
+    cof2 = spu_msub(t0, t1r3r, cof2);             /* ANH EJD IFP MBL - cof2 */
+    cof2 = spu_rlqwbyte(cof2, 8);                 /* IFP MBL ANH EJD - INH MJD AFP EBL */
+    t01 = spu_mul(t0, t1);                                /* AJ EN IB MF */
+    t01 = spu_shuffle(t01, t01, _VECTORMATH_SHUF_YXWZ);     /* EN AJ MF IB */
+    cof2 = spu_madd(t3, t01, cof2);               /* LEN PAJ DMF HIB + cof2 */
+    cof3 = spu_msub(t2r, t01, cof3);              /* KEN OAJ CMF GIB - cof3 */
+    t01r = spu_rlqwbyte(t01, 8);                  /* MF IB EN AJ */
+    cof2 = spu_msub(t3, t01r, cof2);              /* LMF PIB DEN HAJ - cof2 */
+    cof3 = spu_nmsub(t2r, t01r, cof3);            /* cof3 - KMF OIB CEN GAJ */
+    t03 = spu_mul(t0, t3);                                /* AL EP ID MH */
+    t03 = spu_shuffle(t03, t03, _VECTORMATH_SHUF_YXWZ);     /* EP AL MH ID */
+    cof1 = spu_nmsub(t2r, t03, cof1);             /* cof1 - KEP OAL CMH GID */
+    cof2 = spu_madd(t1, t03, cof2);               /* JEP NAL BMH FID + cof2 */
+    t03r = spu_rlqwbyte(t03, 8);                  /* MH ID EP AL */
+    cof1 = spu_madd(t2r, t03r, cof1);             /* KMH OID CEP GAL + cof1 */
+    cof2 = spu_nmsub(t1, t03r, cof2);             /* cof2 - JMH NID BEP FAL */
+    t02 = spu_mul(t0, t2r);                       /* AK EO IC MG */
+    t02 = spu_shuffle(t02, t02, _VECTORMATH_SHUF_YXWZ);     /* E0 AK MG IC */
+    cof1 = spu_madd(t3, t02, cof1);               /* LEO PAK DMG HIC + cof1 */
+    cof3 = spu_nmsub(t1, t02, cof3);              /* cof3 - JEO NAK BMG FIC */
+    t02r = spu_rlqwbyte(t02, 8);                  /* MG IC EO AK */
+    cof1 = spu_nmsub(t3, t02r, cof1);             /* cof1 - LMG PIC DEO HAK */
+    cof3 = spu_madd(t1, t02r, cof3);              /* JMG NIC BEO FAK + cof3 */
+    /* Compute the determinant of the matrix
+     *
+     * det = sum_across(t0 * cof0);
+     *
+     * We perform a sum across the entire vector so that
+     * we don't have to splat the result when multiplying the
+     * cofactors by the inverse of the determinant.
+     */
+    det  = spu_mul(t0, cof0);
+    det1 = spu_rlqwbyte(det, 4);
+    det2 = spu_rlqwbyte(det, 8);
+    det3 = spu_rlqwbyte(det, 12);
+    det  = spu_add(det, det1);
+    det2 = spu_add(det2, det3);
+    det  = spu_add(det, det2);
+    /* Compute the reciprocal of the determinant.
+     */
+    invdet = recipf4(det);
+    /* Multiply the cofactors by the reciprocal of the determinant.
+     */
+    return Matrix4(
+        Vector4( spu_mul(cof0, invdet) ),
+        Vector4( spu_mul(cof1, invdet) ),
+        Vector4( spu_mul(cof2, invdet) ),
+        Vector4( spu_mul(cof3, invdet) )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline float determinant( const Matrix4 & mat )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 in0, in1, in2, in3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    vec_float4 cof0;
+    vec_float4 t0, t1, t2, t3;
+    vec_float4 t12, t23;
+    vec_float4 t1r, t2r;
+    vec_float4 t12r, t23r;
+    vec_float4 t1r3, t1r3r;
+    in0 = mat.getCol0().get128();
+    in1 = mat.getCol1().get128();
+    in2 = mat.getCol2().get128();
+    in3 = mat.getCol3().get128();
+    /* Perform transform of the input matrix of the form:
+     *    A B C D
+     *    E F G H
+     *    I J K L
+     *    M N O P
+     *
+     * The pseudo transpose of the input matrix is trans:
+     *    A E I M
+     *    J N B F
+     *    C G K O
+     *    L P D H
+     */
+    tmp0 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_XAZC);    /* A E C G */
+    tmp1 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_XAZC);    /* I M K O */
+    tmp2 = spu_shuffle(in0, in1, _VECTORMATH_SHUF_YBWD);    /* B F D H */
+    tmp3 = spu_shuffle(in2, in3, _VECTORMATH_SHUF_YBWD);    /* J N L P */
+    t0 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_XYAB);    /* A E I M */
+    t1 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_XYAB);    /* J N B F */
+    t2 = spu_shuffle(tmp0, tmp1, _VECTORMATH_SHUF_ZWCD);    /* C G K O */
+    t3 = spu_shuffle(tmp3, tmp2, _VECTORMATH_SHUF_ZWCD);    /* L P D H */
+    /* Generate a cofactor matrix. The computed cofactors reside in
+     * cof0, cof1, cof2, cof3.
+     */
+    t23 = spu_mul(t2, t3);                        /* CL GP KD OH */
+    t23 = spu_shuffle(t23, t23, _VECTORMATH_SHUF_YXWZ);     /* GP CL OH KD */
+    cof0 = spu_mul(t1, t23);                      /* JGP NCL BOH FKD */
+    t23r = spu_rlqwbyte(t23, 8);                  /* OH KD GP CL */
+    cof0 = spu_msub(t1, t23r, cof0);              /* JOH NKD BGP FCL  - cof0 */
+
+    t12 = spu_mul(t1, t2);                        /* JC NG BK FO */
+    t12 = spu_shuffle(t12, t12, _VECTORMATH_SHUF_YXWZ);     /* NG JC FO BK */
+    cof0 = spu_madd(t3, t12, cof0);               /* LNG PJC DFO HBK + cof0 */
+    t12r = spu_rlqwbyte(t12, 8);                  /* FO BK NG JC */
+    cof0 = spu_nmsub(t3, t12r, cof0);             /* cof0 - LFO PBK DNG HJC */
+    t1r = spu_rlqwbyte(t1, 8);                    /* B F J N */
+    t2r = spu_rlqwbyte(t2, 8);                    /* K O C G */
+    t1r3 = spu_mul(t1r, t3);                      /* BL FP JD NH */
+    t1r3 = spu_shuffle(t1r3, t1r3, _VECTORMATH_SHUF_YXWZ);  /* FP BL NH JD */
+    cof0 = spu_madd(t2r, t1r3, cof0);             /* KFP OBL CNH GJD + cof0 */
+    t1r3r = spu_rlqwbyte(t1r3, 8);                /* NH JD FP BL */
+    cof0 = spu_nmsub(t2r, t1r3r, cof0);           /* cof0 - KNH OJD CFP GBL */
+    return spu_extract( _vmathVfDot4(t0,cof0), 0 );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( Vector4 vec ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz, wwww;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
+    wwww = spu_shuffle( vec.get128(), vec.get128(), shuffle_wwww );
+    tmp0 = spu_mul( mCol0.get128(), xxxx );
+    tmp1 = spu_mul( mCol1.get128(), yyyy );
+    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = spu_madd( mCol3.get128(), wwww, tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    return Vector4( res );
+}
+
+inline const Vector4 Matrix4::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
+    res = spu_mul( mCol0.get128(), xxxx );
+    res = spu_madd( mCol1.get128(), yyyy, res );
+    res = spu_madd( mCol2.get128(), zzzz, res );
+    return Vector4( res );
+}
+
+inline const Vector4 Matrix4::operator *( Point3 pnt ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_zzzz );
+    tmp0 = spu_mul( mCol0.get128(), xxxx );
+    tmp1 = spu_mul( mCol1.get128(), yyyy );
+    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = spu_add( mCol3.get128(), tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    return Vector4( res );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( Vector3 translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4::yAxis( ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ.get128();
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    return Matrix4(
+        Vector4( spu_mul( Z0, Y0 ) ),
+        Vector4( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
+        Vector4( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( float radians, Vector3 unitVec )
+{
+    vec_float4 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2, zeroW;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    axis = unitVec.get128();
+    sincosf4( spu_splats( radians ), &s, &c );
+    xxxx = spu_shuffle( axis, axis, shuffle_xxxx );
+    yyyy = spu_shuffle( axis, axis, shuffle_yyyy );
+    zzzz = spu_shuffle( axis, axis, shuffle_zzzz );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    axisS = spu_mul( axis, s );
+    negAxisS = negatef4( axisS );
+    tmp0 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_0ZB0 );
+    tmp1 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_C0X0 );
+    tmp2 = spu_shuffle( axisS, negAxisS, _VECTORMATH_SHUF_YA00 );
+    tmp0 = spu_sel( tmp0, c, (vec_uint4)spu_maskb(0xf000) );
+    tmp1 = spu_sel( tmp1, c, (vec_uint4)spu_maskb(0x0f00) );
+    tmp2 = spu_sel( tmp2, c, (vec_uint4)spu_maskb(0x00f0) );
+    zeroW = (vec_float4)spu_maskb(0x000f);
+    axis = spu_andc( axis, zeroW );
+    return Matrix4(
+        Vector4( spu_madd( spu_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
+        Vector4( spu_madd( spu_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
+        Vector4( spu_madd( spu_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( Quat unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    return Matrix4(
+        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
+        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
+        Vector4( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( Vector3 translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    vec_float4 zero, col0, col1, col2, col3;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = spu_splats(0.0f);
+    col0 = zero;
+    col1 = zero;
+    col2 = zero;
+    col3 = zero;
+    col0 = spu_insert( f / aspect, col0, 0 );
+    col1 = spu_insert( f, col1, 1 );
+    col2 = spu_insert( ( zNear + zFar ) * rangeInv, col2, 2 );
+    col2 = spu_insert( -1.0f, col2, 3 );
+    col3 = spu_insert( zNear * zFar * rangeInv * 2.0f, col3, 2 );
+    return Matrix4(
+        Vector4( col0 ),
+        Vector4( col1 ),
+        Vector4( col2 ),
+        Vector4( col3 )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff;
+    vec_float4 diagonal, column, near2;
+    vec_float4 zero = spu_splats(0.0f);
+    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
+    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
+    diff = spu_sub( rtn, lbf );
+    sum  = spu_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = spu_splats( zNear );
+    near2 = spu_add( near2, near2 );
+    diagonal = spu_mul( near2, inv_diff );
+    column = spu_mul( sum, inv_diff );
+    return Matrix4(
+        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) ) ),
+        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) ) ),
+        Vector4( spu_sel( column, spu_splats(-1.0f), (vec_uint4)spu_maskb(0x000f) ) ),
+        Vector4( spu_sel( zero, spu_mul( diagonal, spu_splats(zFar) ), (vec_uint4)spu_maskb(0x00f0) ) )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    vec_float4 lbf, rtn;
+    vec_float4 diff, sum, inv_diff, neg_inv_diff;
+    vec_float4 diagonal, column;
+    vec_float4 zero = spu_splats(0.0f);
+    lbf = spu_shuffle( spu_promote(left,0), spu_promote(zFar,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( spu_promote(right,0), spu_promote(zNear,0), _VECTORMATH_SHUF_XAYB );
+    lbf = spu_shuffle( lbf, spu_promote(bottom,0), _VECTORMATH_SHUF_XAYB );
+    rtn = spu_shuffle( rtn, spu_promote(top,0), _VECTORMATH_SHUF_XAYB );
+    diff = spu_sub( rtn, lbf );
+    sum  = spu_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = spu_add( inv_diff, inv_diff );
+    column = spu_mul( sum, spu_sel( neg_inv_diff, inv_diff, (vec_uint4)spu_maskb(0x00f0) ) );
+    return Matrix4(
+        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0xf000) ) ),
+        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x0f00) ) ),
+        Vector4( spu_sel( zero, diagonal, (vec_uint4)spu_maskb(0x00f0) ) ),
+        Vector4( spu_sel( column, spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) ) )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( Vector3 _col0, Vector3 _col1, Vector3 _col2, Vector3 _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, Vector3 translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( Quat unitQuat, Vector3 translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3 & Transform3::setCol0( Vector3 _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( Vector3 _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( Vector3 _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( Vector3 _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, Vector3 vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, Vector4 vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+inline float Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
+    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
+    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
+    dot = spu_shuffle( dot, dot, shuffle_xxxx );
+    invdet = recipf4( dot );
+    tmp3 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_XAYB );
+    tmp4 = spu_shuffle( tmp0, tmp2, _VECTORMATH_SHUF_ZCWD );
+    inv0 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_XAYB );
+    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
+    inv1 = spu_shuffle( tmp3, tmp1, _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp4, tmp1, _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
+    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
+    inv3 = spu_mul( inv0, xxxx );
+    inv3 = spu_madd( inv1, yyyy, inv3 );
+    inv3 = spu_madd( inv2, zzzz, inv3 );
+    inv0 = spu_mul( inv0, invdet );
+    inv1 = spu_mul( inv1, invdet );
+    inv2 = spu_mul( inv2, invdet );
+    inv3 = spu_mul( inv3, invdet );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    vec_float4 inv0, inv1, inv2, inv3;
+    vec_float4 tmp0, tmp1;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp0 = spu_shuffle( tfrm.getCol0().get128(), tfrm.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( tfrm.getCol0().get128(), tfrm.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    inv0 = spu_shuffle( tmp0, tfrm.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
+    xxxx = spu_shuffle( inv3, inv3, shuffle_xxxx );
+    inv1 = spu_shuffle( tmp0, tfrm.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
+    inv2 = spu_shuffle( tmp1, tfrm.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( inv3, inv3, shuffle_yyyy );
+    zzzz = spu_shuffle( inv3, inv3, shuffle_zzzz );
+    inv3 = spu_mul( inv0, xxxx );
+    inv3 = spu_madd( inv1, yyyy, inv3 );
+    inv3 = spu_madd( inv2, zzzz, inv3 );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( Vector3 vec ) const
+{
+    vec_float4 res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
+    res = spu_mul( mCol0.get128(), xxxx );
+    res = spu_madd( mCol1.get128(), yyyy, res );
+    res = spu_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+inline const Point3 Transform3::operator *( Point3 pnt ) const
+{
+    vec_float4 tmp0, tmp1, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    xxxx = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_xxxx );
+    yyyy = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_yyyy );
+    zzzz = spu_shuffle( pnt.get128(), pnt.get128(), shuffle_zzzz );
+    tmp0 = spu_mul( mCol0.get128(), xxxx );
+    tmp1 = spu_mul( mCol1.get128(), yyyy );
+    tmp0 = spu_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = spu_add( mCol3.get128(), tmp1 );
+    res = spu_add( tmp0, tmp1 );
+    return Point3( res );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( Vector3 translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( float radians )
+{
+    vec_float4 s, c, res1, res2;
+    vec_uint4 select_y, select_z;
+    vec_float4 zero;
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res1 = spu_sel( zero, c, select_y );
+    res1 = spu_sel( res1, s, select_z );
+    res2 = spu_sel( zero, negatef4(s), select_y );
+    res2 = spu_sel( res2, c, select_z );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( float radians )
+{
+    vec_float4 s, c, res0, res2;
+    vec_uint4 select_x, select_z;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_z = (vec_uint4)spu_maskb(0x00f0);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, negatef4(s), select_z );
+    res2 = spu_sel( zero, s, select_x );
+    res2 = spu_sel( res2, c, select_z );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( float radians )
+{
+    vec_float4 s, c, res0, res1;
+    vec_uint4 select_x, select_y;
+    vec_float4 zero;
+    select_x = (vec_uint4)spu_maskb(0xf000);
+    select_y = (vec_uint4)spu_maskb(0x0f00);
+    zero = spu_splats(0.0f);
+    sincosf4( spu_splats(radians), &s, &c );
+    res0 = spu_sel( zero, c, select_x );
+    res0 = spu_sel( res0, s, select_y );
+    res1 = spu_sel( zero, negatef4(s), select_x );
+    res1 = spu_sel( res1, c, select_y );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( Vector3 radiansXYZ )
+{
+    vec_float4 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    angles = radiansXYZ.get128();
+    angles = spu_insert( 0.0f, angles, 3 );
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = spu_shuffle( s, c, _VECTORMATH_SHUF_CZD0 );
+    Z1 = spu_shuffle( c, negS, _VECTORMATH_SHUF_CZD0 );
+    Y0 = spu_shuffle( negS, c, _VECTORMATH_SHUF_BBY0 );
+    Y1 = spu_shuffle( c, s, _VECTORMATH_SHUF_BBY0 );
+    X0 = spu_shuffle( s, s, shuffle_xxxx );
+    X1 = spu_shuffle( c, c, shuffle_xxxx );
+    tmp = spu_mul( Z0, Y1 );
+    return Transform3(
+        Vector3( spu_mul( Z0, Y0 ) ),
+        Vector3( spu_madd( Z1, X1, spu_mul( tmp, X0 ) ) ),
+        Vector3( spu_nmsub( Z1, X0, spu_mul( tmp, X1 ) ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotation( float radians, Vector3 unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::rotation( Quat unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::scale( Vector3 scaleVec )
+{
+    vec_float4 zero = spu_splats(0.0f);
+    return Transform3(
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0xf000) ) ),
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x0f00) ) ),
+        Vector3( spu_sel( zero, scaleVec.get128(), (vec_uint4)spu_maskb(0x00f0) ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( Vector3 translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    vec_float4 res;
+    vec_float4 col0, col1, col2;
+    vec_float4 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    vec_float4 zy_xz_yx, yz_zx_xy, sum, diff;
+    vec_float4 radicand, invSqrt, scale;
+    vec_float4 res0, res1, res2, res3;
+    vec_float4 xx, yy, zz;
+    vec_uint4 select_x = (vec_uint4)spu_maskb( 0xf000 );
+    vec_uint4 select_y = (vec_uint4)spu_maskb( 0x0f00 );
+    vec_uint4 select_z = (vec_uint4)spu_maskb( 0x00f0 );
+    vec_uint4 select_w = (vec_uint4)spu_maskb( 0x000f );
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((unsigned int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((unsigned int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((unsigned int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((unsigned int)0x0c0d0e0f);
+
+    col0 = tfrm.getCol0().get128();
+    col1 = tfrm.getCol1().get128();
+    col2 = tfrm.getCol2().get128();
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = spu_sel( col0, col1, select_y );
+    xx_yy_zz_xx = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_XYCX );
+    yy_zz_xx_yy = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_YCXY );
+    zz_xx_yy_zz = spu_shuffle( xx_yy, col2, _VECTORMATH_SHUF_CXYC );
+
+    diagSum = spu_add( spu_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = spu_sub( spu_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = spu_add( spu_sel( diagDiff, diagSum, select_w ), spu_splats(1.0f) );
+    invSqrt = rsqrtf4( radicand );
+
+    zy_xz_yx = spu_sel( col0, col1, select_z );
+    zy_xz_yx = spu_shuffle( zy_xz_yx, col2, _VECTORMATH_SHUF_ZAY0 );
+    yz_zx_xy = spu_sel( col0, col1, select_x );
+    yz_zx_xy = spu_shuffle( yz_zx_xy, col2, _VECTORMATH_SHUF_BZX0 );
+
+    sum = spu_add( zy_xz_yx, yz_zx_xy );
+    diff = spu_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = spu_mul( invSqrt, spu_splats(0.5f) );
+    res0 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_0ZYA );
+    res1 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_Z0XB );
+    res2 = spu_shuffle( sum, diff, _VECTORMATH_SHUF_YX0C );
+    res3 = diff;
+    res0 = spu_sel( res0, radicand, select_x );
+    res1 = spu_sel( res1, radicand, select_y );
+    res2 = spu_sel( res2, radicand, select_z );
+    res3 = spu_sel( res3, radicand, select_w );
+    res0 = spu_mul( res0, spu_shuffle( scale, scale, shuffle_xxxx ) );
+    res1 = spu_mul( res1, spu_shuffle( scale, scale, shuffle_yyyy ) );
+    res2 = spu_mul( res2, spu_shuffle( scale, scale, shuffle_zzzz ) );
+    res3 = spu_mul( res3, spu_shuffle( scale, scale, shuffle_wwww ) );
+
+    /* determine case and select answer */
+
+    xx = spu_shuffle( col0, col0, shuffle_xxxx );
+    yy = spu_shuffle( col1, col1, shuffle_yyyy );
+    zz = spu_shuffle( col2, col2, shuffle_zzzz );
+    res = spu_sel( res0, res1, spu_cmpgt( yy, xx ) );
+    res = spu_sel( res, res2, spu_and( spu_cmpgt( zz, xx ), spu_cmpgt( zz, yy ) ) );
+    res = spu_sel( res, res3, spu_cmpgt( spu_shuffle( diagSum, diagSum, shuffle_xxxx ), spu_splats(0.0f) ) );
+    mVec128 = res;
+}
+
+inline const Matrix3 outer( Vector3 tfrm0, Vector3 tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( Vector4 tfrm0, Vector4 tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat )
+{
+    vec_float4 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    vec_float4 xxxx, yyyy, zzzz;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    tmp0 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mat.getCol0().get128(), mat.getCol2().get128(), _VECTORMATH_SHUF_ZCWD );
+    xxxx = spu_shuffle( vec.get128(), vec.get128(), shuffle_xxxx );
+    mcol0 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_XAYB );
+    mcol1 = spu_shuffle( tmp0, mat.getCol1().get128(), _VECTORMATH_SHUF_ZBW0 );
+    mcol2 = spu_shuffle( tmp1, mat.getCol1().get128(), _VECTORMATH_SHUF_XCY0 );
+    yyyy = spu_shuffle( vec.get128(), vec.get128(), shuffle_yyyy );
+    res = spu_mul( mcol0, xxxx );
+    zzzz = spu_shuffle( vec.get128(), vec.get128(), shuffle_zzzz );
+    res = spu_madd( mcol1, yyyy, res );
+    res = spu_madd( mcol2, zzzz, res );
+    return Vector3( res );
+}
+
+inline const Matrix3 crossMatrix( Vector3 vec )
+{
+    vec_float4 neg, res0, res1, res2;
+    neg = negatef4( vec.get128() );
+    res0 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_0ZB0 );
+    res1 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_C0X0 );
+    res2 = spu_shuffle( vec.get128(), neg, _VECTORMATH_SHUF_YA00 );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_soa.h
index 8c884d593..ef43e7b00 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/mat_soa.h
@@ -1,1744 +1,1744 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_MAT_SOA_CPP_H
-#define _VECTORMATH_MAT_SOA_CPP_H
-
-namespace Vectormath {
-namespace Soa {
-
-//-----------------------------------------------------------------------------
-// Constants
-
-#define _VECTORMATH_PI_OVER_2 1.570796327f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-inline Matrix3::Matrix3( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-}
-
-inline Matrix3::Matrix3( vec_float4 scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-}
-
-inline Matrix3::Matrix3( const Quat & unitQuat )
-{
-    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
-    qx = unitQuat.getX();
-    qy = unitQuat.getY();
-    qz = unitQuat.getZ();
-    qw = unitQuat.getW();
-    qx2 = spu_add( qx, qx );
-    qy2 = spu_add( qy, qy );
-    qz2 = spu_add( qz, qz );
-    qxqx2 = spu_mul( qx, qx2 );
-    qxqy2 = spu_mul( qx, qy2 );
-    qxqz2 = spu_mul( qx, qz2 );
-    qxqw2 = spu_mul( qw, qx2 );
-    qyqy2 = spu_mul( qy, qy2 );
-    qyqz2 = spu_mul( qy, qz2 );
-    qyqw2 = spu_mul( qw, qy2 );
-    qzqz2 = spu_mul( qz, qz2 );
-    qzqw2 = spu_mul( qw, qz2 );
-    mCol0 = Vector3( spu_sub( spu_sub( spu_splats(1.0f), qyqy2 ), qzqz2 ), spu_add( qxqy2, qzqw2 ), spu_sub( qxqz2, qyqw2 ) );
-    mCol1 = Vector3( spu_sub( qxqy2, qzqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qzqz2 ), spu_add( qyqz2, qxqw2 ) );
-    mCol2 = Vector3( spu_add( qxqz2, qyqw2 ), spu_sub( qyqz2, qxqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qyqy2 ) );
-}
-
-inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-}
-
-inline Matrix3::Matrix3( const Aos::Matrix3 & mat )
-{
-    mCol0 = Vector3( mat.getCol0() );
-    mCol1 = Vector3( mat.getCol1() );
-    mCol2 = Vector3( mat.getCol2() );
-}
-
-inline Matrix3::Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 )
-{
-    mCol0 = Vector3( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
-    mCol1 = Vector3( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
-    mCol2 = Vector3( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
-}
-
-inline void Matrix3::get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const
-{
-    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol0( tmpV3_0 );
-    result1.setCol0( tmpV3_1 );
-    result2.setCol0( tmpV3_2 );
-    result3.setCol0( tmpV3_3 );
-    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol1( tmpV3_0 );
-    result1.setCol1( tmpV3_1 );
-    result2.setCol1( tmpV3_2 );
-    result3.setCol1( tmpV3_3 );
-    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol2( tmpV3_0 );
-    result1.setCol2( tmpV3_1 );
-    result2.setCol2( tmpV3_2 );
-    result3.setCol2( tmpV3_3 );
-}
-
-inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    return *this;
-}
-
-inline Matrix3 & Matrix3::setElem( int col, int row, vec_float4 val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Matrix3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Matrix3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Matrix3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Matrix3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Matrix3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::getRow( int row ) const
-{
-    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
-}
-
-inline Vector3 & Matrix3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Matrix3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    return *this;
-}
-
-inline const Matrix3 transpose( const Matrix3 & mat )
-{
-    return Matrix3(
-        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
-        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
-        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
-    );
-}
-
-inline const Matrix3 inverse( const Matrix3 & mat )
-{
-    Vector3 tmp0, tmp1, tmp2;
-    vec_float4 detinv;
-    tmp0 = cross( mat.getCol1(), mat.getCol2() );
-    tmp1 = cross( mat.getCol2(), mat.getCol0() );
-    tmp2 = cross( mat.getCol0(), mat.getCol1() );
-    detinv = recipf4( dot( mat.getCol2(), tmp2 ) );
-    return Matrix3(
-        Vector3( spu_mul( tmp0.getX(), detinv ), spu_mul( tmp1.getX(), detinv ), spu_mul( tmp2.getX(), detinv ) ),
-        Vector3( spu_mul( tmp0.getY(), detinv ), spu_mul( tmp1.getY(), detinv ), spu_mul( tmp2.getY(), detinv ) ),
-        Vector3( spu_mul( tmp0.getZ(), detinv ), spu_mul( tmp1.getZ(), detinv ), spu_mul( tmp2.getZ(), detinv ) )
-    );
-}
-
-inline vec_float4 determinant( const Matrix3 & mat )
-{
-    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
-}
-
-inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 )
-    );
-}
-
-inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix3 Matrix3::operator -( ) const
-{
-    return Matrix3(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 )
-    );
-}
-
-inline const Matrix3 absPerElem( const Matrix3 & mat )
-{
-    return Matrix3(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( vec_float4 scalar ) const
-{
-    return Matrix3(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) )
-    );
-}
-
-inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
-{
-    return Matrix3(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 )
-    );
-}
-
-inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
-{
-    return Matrix3(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() )
-    );
-}
-
-inline const Matrix3 Matrix3::identity( )
-{
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3::xAxis( ),
-        Vector3( spu_splats(0.0f), c, s ),
-        Vector3( spu_splats(0.0f), negatef4( s ), c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3( c, spu_splats(0.0f), negatef4( s ) ),
-        Vector3::yAxis( ),
-        Vector3( s, spu_splats(0.0f), c )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix3(
-        Vector3( c, s, spu_splats(0.0f) ),
-        Vector3( negatef4( s ), c, spu_splats(0.0f) ),
-        Vector3::zAxis( )
-    );
-}
-
-inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    return Matrix3(
-        Vector3( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) ),
-        Vector3( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) ),
-        Vector3( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = spu_mul( x, y );
-    yz = spu_mul( y, z );
-    zx = spu_mul( z, x );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    return Matrix3(
-        Vector3( spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ) ),
-        Vector3( spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ) ),
-        Vector3( spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ) )
-    );
-}
-
-inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
-{
-    return Matrix3( unitQuat );
-}
-
-inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
-{
-    return Matrix3(
-        Vector3( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector3( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f) ),
-        Vector3( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ() )
-    );
-}
-
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
-{
-    return Matrix3(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) )
-    );
-}
-
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
-{
-    return Matrix3(
-        mulPerElem( mat.getCol0(), scaleVec ),
-        mulPerElem( mat.getCol1(), scaleVec ),
-        mulPerElem( mat.getCol2(), scaleVec )
-    );
-}
-
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 )
-{
-    return Matrix3(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix3 & mat )
-{
-    Aos::Matrix3 mat0, mat1, mat2, mat3;
-    mat.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Matrix3 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Matrix4::Matrix4( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-}
-
-inline Matrix4::Matrix4( vec_float4 scalar )
-{
-    mCol0 = Vector4( scalar );
-    mCol1 = Vector4( scalar );
-    mCol2 = Vector4( scalar );
-    mCol3 = Vector4( scalar );
-}
-
-inline Matrix4::Matrix4( const Transform3 & mat )
-{
-    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
-    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
-    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
-    mCol3 = Vector4( mat.getCol3(), spu_splats(1.0f) );
-}
-
-inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
-{
-    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
-    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
-    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
-    mCol3 = Vector4( translateVec, spu_splats(1.0f) );
-}
-
-inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    Matrix3 mat;
-    mat = Matrix3( unitQuat );
-    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
-    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
-    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
-    mCol3 = Vector4( translateVec, spu_splats(1.0f) );
-}
-
-inline Matrix4::Matrix4( const Aos::Matrix4 & mat )
-{
-    mCol0 = Vector4( mat.getCol0() );
-    mCol1 = Vector4( mat.getCol1() );
-    mCol2 = Vector4( mat.getCol2() );
-    mCol3 = Vector4( mat.getCol3() );
-}
-
-inline Matrix4::Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 )
-{
-    mCol0 = Vector4( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
-    mCol1 = Vector4( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
-    mCol2 = Vector4( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
-    mCol3 = Vector4( mat0.getCol3(), mat1.getCol3(), mat2.getCol3(), mat3.getCol3() );
-}
-
-inline void Matrix4::get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const
-{
-    Aos::Vector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
-    mCol0.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol0( tmpV4_0 );
-    result1.setCol0( tmpV4_1 );
-    result2.setCol0( tmpV4_2 );
-    result3.setCol0( tmpV4_3 );
-    mCol1.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol1( tmpV4_0 );
-    result1.setCol1( tmpV4_1 );
-    result2.setCol1( tmpV4_2 );
-    result3.setCol1( tmpV4_3 );
-    mCol2.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol2( tmpV4_0 );
-    result1.setCol2( tmpV4_1 );
-    result2.setCol2( tmpV4_2 );
-    result3.setCol2( tmpV4_3 );
-    mCol3.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
-    result0.setCol3( tmpV4_0 );
-    result1.setCol3( tmpV4_1 );
-    result2.setCol3( tmpV4_2 );
-    result3.setCol3( tmpV4_3 );
-}
-
-inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Matrix4 & Matrix4::setElem( int col, int row, vec_float4 val )
-{
-    Vector4 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Matrix4::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector4 Matrix4::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector4 Matrix4::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector4 Matrix4::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector4 Matrix4::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector4 Matrix4::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector4 & Matrix4::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Matrix4::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
-{
-    mCol0 = mat.mCol0;
-    mCol1 = mat.mCol1;
-    mCol2 = mat.mCol2;
-    mCol3 = mat.mCol3;
-    return *this;
-}
-
-inline const Matrix4 transpose( const Matrix4 & mat )
-{
-    return Matrix4(
-        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
-        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
-        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
-        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
-    );
-}
-
-inline const Matrix4 inverse( const Matrix4 & mat )
-{
-    Vector4 res0, res1, res2, res3;
-    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
-    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
-    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
-    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
-    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
-    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
-    res0.setX( spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) ) );
-    res0.setY( spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) ) );
-    res0.setZ( spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) ) );
-    res0.setW( spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) ) );
-    detInv = recipf4( spu_add( spu_add( spu_add( spu_mul( mA, res0.getX() ), spu_mul( mE, res0.getY() ) ), spu_mul( mI, res0.getZ() ) ), spu_mul( mM, res0.getW() ) ) );
-    res1.setX( spu_mul( mI, tmp1 ) );
-    res1.setY( spu_mul( mM, tmp0 ) );
-    res1.setZ( spu_mul( mA, tmp1 ) );
-    res1.setW( spu_mul( mE, tmp0 ) );
-    res3.setX( spu_mul( mI, tmp3 ) );
-    res3.setY( spu_mul( mM, tmp2 ) );
-    res3.setZ( spu_mul( mA, tmp3 ) );
-    res3.setW( spu_mul( mE, tmp2 ) );
-    res2.setX( spu_mul( mI, tmp5 ) );
-    res2.setY( spu_mul( mM, tmp4 ) );
-    res2.setZ( spu_mul( mA, tmp5 ) );
-    res2.setW( spu_mul( mE, tmp4 ) );
-    tmp0 = spu_sub( spu_mul( mI, mB ), spu_mul( mA, mJ ) );
-    tmp1 = spu_sub( spu_mul( mM, mF ), spu_mul( mE, mN ) );
-    tmp2 = spu_sub( spu_mul( mI, mD ), spu_mul( mA, mL ) );
-    tmp3 = spu_sub( spu_mul( mM, mH ), spu_mul( mE, mP ) );
-    tmp4 = spu_sub( spu_mul( mI, mC ), spu_mul( mA, mK ) );
-    tmp5 = spu_sub( spu_mul( mM, mG ), spu_mul( mE, mO ) );
-    res2.setX( spu_add( spu_sub( spu_mul( mL, tmp1 ), spu_mul( mJ, tmp3 ) ), res2.getX() ) );
-    res2.setY( spu_add( spu_sub( spu_mul( mP, tmp0 ), spu_mul( mN, tmp2 ) ), res2.getY() ) );
-    res2.setZ( spu_sub( spu_sub( spu_mul( mB, tmp3 ), spu_mul( mD, tmp1 ) ), res2.getZ() ) );
-    res2.setW( spu_sub( spu_sub( spu_mul( mF, tmp2 ), spu_mul( mH, tmp0 ) ), res2.getW() ) );
-    res3.setX( spu_add( spu_sub( spu_mul( mJ, tmp5 ), spu_mul( mK, tmp1 ) ), res3.getX() ) );
-    res3.setY( spu_add( spu_sub( spu_mul( mN, tmp4 ), spu_mul( mO, tmp0 ) ), res3.getY() ) );
-    res3.setZ( spu_sub( spu_sub( spu_mul( mC, tmp1 ), spu_mul( mB, tmp5 ) ), res3.getZ() ) );
-    res3.setW( spu_sub( spu_sub( spu_mul( mG, tmp0 ), spu_mul( mF, tmp4 ) ), res3.getW() ) );
-    res1.setX( spu_sub( spu_sub( spu_mul( mK, tmp3 ), spu_mul( mL, tmp5 ) ), res1.getX() ) );
-    res1.setY( spu_sub( spu_sub( spu_mul( mO, tmp2 ), spu_mul( mP, tmp4 ) ), res1.getY() ) );
-    res1.setZ( spu_add( spu_sub( spu_mul( mD, tmp5 ), spu_mul( mC, tmp3 ) ), res1.getZ() ) );
-    res1.setW( spu_add( spu_sub( spu_mul( mH, tmp4 ), spu_mul( mG, tmp2 ) ), res1.getW() ) );
-    return Matrix4(
-        ( res0 * detInv ),
-        ( res1 * detInv ),
-        ( res2 * detInv ),
-        ( res3 * detInv )
-    );
-}
-
-inline const Matrix4 affineInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( inverse( affineMat ) );
-}
-
-inline const Matrix4 orthoInverse( const Matrix4 & mat )
-{
-    Transform3 affineMat;
-    affineMat.setCol0( mat.getCol0().getXYZ( ) );
-    affineMat.setCol1( mat.getCol1().getXYZ( ) );
-    affineMat.setCol2( mat.getCol2().getXYZ( ) );
-    affineMat.setCol3( mat.getCol3().getXYZ( ) );
-    return Matrix4( orthoInverse( affineMat ) );
-}
-
-inline vec_float4 determinant( const Matrix4 & mat )
-{
-    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-    mA = mat.getCol0().getX();
-    mB = mat.getCol0().getY();
-    mC = mat.getCol0().getZ();
-    mD = mat.getCol0().getW();
-    mE = mat.getCol1().getX();
-    mF = mat.getCol1().getY();
-    mG = mat.getCol1().getZ();
-    mH = mat.getCol1().getW();
-    mI = mat.getCol2().getX();
-    mJ = mat.getCol2().getY();
-    mK = mat.getCol2().getZ();
-    mL = mat.getCol2().getW();
-    mM = mat.getCol3().getX();
-    mN = mat.getCol3().getY();
-    mO = mat.getCol3().getZ();
-    mP = mat.getCol3().getW();
-    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
-    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
-    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
-    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
-    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
-    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
-    dx = spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) );
-    dy = spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) );
-    dz = spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) );
-    dw = spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) );
-    return spu_add( spu_add( spu_add( spu_mul( mA, dx ), spu_mul( mE, dy ) ), spu_mul( mI, dz ) ), spu_mul( mM, dw ) );
-}
-
-inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 + mat.mCol0 ),
-        ( mCol1 + mat.mCol1 ),
-        ( mCol2 + mat.mCol2 ),
-        ( mCol3 + mat.mCol3 )
-    );
-}
-
-inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( mCol0 - mat.mCol0 ),
-        ( mCol1 - mat.mCol1 ),
-        ( mCol2 - mat.mCol2 ),
-        ( mCol3 - mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
-{
-    *this = *this + mat;
-    return *this;
-}
-
-inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
-{
-    *this = *this - mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator -( ) const
-{
-    return Matrix4(
-        ( -mCol0 ),
-        ( -mCol1 ),
-        ( -mCol2 ),
-        ( -mCol3 )
-    );
-}
-
-inline const Matrix4 absPerElem( const Matrix4 & mat )
-{
-    return Matrix4(
-        absPerElem( mat.getCol0() ),
-        absPerElem( mat.getCol1() ),
-        absPerElem( mat.getCol2() ),
-        absPerElem( mat.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( vec_float4 scalar ) const
-{
-    return Matrix4(
-        ( mCol0 * scalar ),
-        ( mCol1 * scalar ),
-        ( mCol2 * scalar ),
-        ( mCol3 * scalar )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat )
-{
-    return mat * scalar;
-}
-
-inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
-{
-    return Vector4(
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ), spu_mul( mCol3.getX(), vec.getW() ) ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ), spu_mul( mCol3.getY(), vec.getW() ) ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) ), spu_mul( mCol3.getZ(), vec.getW() ) ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getW(), vec.getX() ), spu_mul( mCol1.getW(), vec.getY() ) ), spu_mul( mCol2.getW(), vec.getZ() ) ), spu_mul( mCol3.getW(), vec.getW() ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
-{
-    return Vector4(
-        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getW(), vec.getX() ), spu_mul( mCol1.getW(), vec.getY() ) ), spu_mul( mCol2.getW(), vec.getZ() ) )
-    );
-}
-
-inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
-{
-    return Vector4(
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), pnt.getX() ), spu_mul( mCol1.getX(), pnt.getY() ) ), spu_mul( mCol2.getX(), pnt.getZ() ) ), mCol3.getX() ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), pnt.getX() ), spu_mul( mCol1.getY(), pnt.getY() ) ), spu_mul( mCol2.getY(), pnt.getZ() ) ), mCol3.getY() ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), pnt.getX() ), spu_mul( mCol1.getZ(), pnt.getY() ) ), spu_mul( mCol2.getZ(), pnt.getZ() ) ), mCol3.getZ() ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getW(), pnt.getX() ), spu_mul( mCol1.getW(), pnt.getY() ) ), spu_mul( mCol2.getW(), pnt.getZ() ) ), mCol3.getW() )
-    );
-}
-
-inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
-{
-    return Matrix4(
-        ( *this * mat.mCol0 ),
-        ( *this * mat.mCol1 ),
-        ( *this * mat.mCol2 ),
-        ( *this * mat.mCol3 )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
-{
-    *this = *this * mat;
-    return *this;
-}
-
-inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
-{
-    return Matrix4(
-        ( *this * tfrm.getCol0() ),
-        ( *this * tfrm.getCol1() ),
-        ( *this * tfrm.getCol2() ),
-        ( *this * Point3( tfrm.getCol3() ) )
-    );
-}
-
-inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
-{
-    return Matrix4(
-        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
-        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
-        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
-        mulPerElem( mat0.getCol3(), mat1.getCol3() )
-    );
-}
-
-inline const Matrix4 Matrix4::identity( )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
-{
-    mCol0.setXYZ( mat3.getCol0() );
-    mCol1.setXYZ( mat3.getCol1() );
-    mCol2.setXYZ( mat3.getCol2() );
-    return *this;
-}
-
-inline const Matrix3 Matrix4::getUpper3x3( ) const
-{
-    return Matrix3(
-        mCol0.getXYZ( ),
-        mCol1.getXYZ( ),
-        mCol2.getXYZ( )
-    );
-}
-
-inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
-{
-    mCol3.setXYZ( translateVec );
-    return *this;
-}
-
-inline const Vector3 Matrix4::getTranslation( ) const
-{
-    return mCol3.getXYZ( );
-}
-
-inline const Matrix4 Matrix4::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4( spu_splats(0.0f), c, s, spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), negatef4( s ), c, spu_splats(0.0f) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4( c, spu_splats(0.0f), negatef4( s ), spu_splats(0.0f) ),
-        Vector4::yAxis( ),
-        Vector4( s, spu_splats(0.0f), c, spu_splats(0.0f) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Matrix4(
-        Vector4( c, s, spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( negatef4( s ), c, spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4::zAxis( ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    return Matrix4(
-        Vector4( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ), spu_splats(0.0f) ),
-        Vector4( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ), spu_splats(0.0f) ),
-        Vector4( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ), spu_splats(0.0f) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
-    sincosf4( radians, &s, &c );
-    x = unitVec.getX();
-    y = unitVec.getY();
-    z = unitVec.getZ();
-    xy = spu_mul( x, y );
-    yz = spu_mul( y, z );
-    zx = spu_mul( z, x );
-    oneMinusC = spu_sub( spu_splats(1.0f), c );
-    return Matrix4(
-        Vector4( spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_splats(0.0f) ),
-        Vector4( spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_splats(0.0f) ),
-        Vector4( spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ), spu_splats(0.0f) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
-{
-    return Matrix4( Transform3::rotation( unitQuat ) );
-}
-
-inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
-{
-    return Matrix4(
-        Vector4( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ(), spu_splats(0.0f) ),
-        Vector4::wAxis( )
-    );
-}
-
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
-{
-    return Matrix4(
-        ( mat.getCol0() * scaleVec.getX( ) ),
-        ( mat.getCol1() * scaleVec.getY( ) ),
-        ( mat.getCol2() * scaleVec.getZ( ) ),
-        mat.getCol3()
-    );
-}
-
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
-{
-    Vector4 scale4;
-    scale4 = Vector4( scaleVec, spu_splats(1.0f) );
-    return Matrix4(
-        mulPerElem( mat.getCol0(), scale4 ),
-        mulPerElem( mat.getCol1(), scale4 ),
-        mulPerElem( mat.getCol2(), scale4 ),
-        mulPerElem( mat.getCol3(), scale4 )
-    );
-}
-
-inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
-{
-    return Matrix4(
-        Vector4::xAxis( ),
-        Vector4::yAxis( ),
-        Vector4::zAxis( ),
-        Vector4( translateVec, spu_splats(1.0f) )
-    );
-}
-
-inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
-{
-    Matrix4 m4EyeFrame;
-    Vector3 v3X, v3Y, v3Z;
-    v3Y = normalize( upVec );
-    v3Z = normalize( ( eyePos - lookAtPos ) );
-    v3X = normalize( cross( v3Y, v3Z ) );
-    v3Y = cross( v3Z, v3X );
-    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
-    return orthoInverse( m4EyeFrame );
-}
-
-inline const Matrix4 Matrix4::perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 f, rangeInv;
-    f = tanf4( spu_sub( spu_splats( _VECTORMATH_PI_OVER_2 ), spu_mul( spu_splats(0.5f), fovyRadians ) ) );
-    rangeInv = recipf4( spu_sub( zNear, zFar ) );
-    return Matrix4(
-        Vector4( divf4( f, aspect ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), f, spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_add( zNear, zFar ), rangeInv ), spu_splats(-1.0f) ),
-        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( spu_mul( zNear, zFar ), rangeInv ), spu_splats(2.0f) ), spu_splats(0.0f) )
-    );
-}
-
-inline const Matrix4 Matrix4::frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
-    sum_rl = spu_add( right, left );
-    sum_tb = spu_add( top, bottom );
-    sum_nf = spu_add( zNear, zFar );
-    inv_rl = recipf4( spu_sub( right, left ) );
-    inv_tb = recipf4( spu_sub( top, bottom ) );
-    inv_nf = recipf4( spu_sub( zNear, zFar ) );
-    n2 = spu_add( zNear, zNear );
-    return Matrix4(
-        Vector4( spu_mul( n2, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), spu_mul( n2, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_mul( sum_rl, inv_rl ), spu_mul( sum_tb, inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(-1.0f) ),
-        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( n2, inv_nf ), zFar ), spu_splats(0.0f) )
-    );
-}
-
-inline const Matrix4 Matrix4::orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
-{
-    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
-    sum_rl = spu_add( right, left );
-    sum_tb = spu_add( top, bottom );
-    sum_nf = spu_add( zNear, zFar );
-    inv_rl = recipf4( spu_sub( right, left ) );
-    inv_tb = recipf4( spu_sub( top, bottom ) );
-    inv_nf = recipf4( spu_sub( zNear, zFar ) );
-    return Matrix4(
-        Vector4( spu_add( inv_rl, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), spu_add( inv_tb, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_add( inv_nf, inv_nf ), spu_splats(0.0f) ),
-        Vector4( spu_mul( negatef4( sum_rl ), inv_rl ), spu_mul( negatef4( sum_tb ), inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(1.0f) )
-    );
-}
-
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 )
-{
-    return Matrix4(
-        select( mat0.getCol0(), mat1.getCol0(), select1 ),
-        select( mat0.getCol1(), mat1.getCol1(), select1 ),
-        select( mat0.getCol2(), mat1.getCol2(), select1 ),
-        select( mat0.getCol3(), mat1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Matrix4 & mat )
-{
-    Aos::Matrix4 mat0, mat1, mat2, mat3;
-    mat.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Matrix4 & mat, const char * name )
-{
-    printf("%s:\n", name);
-    print( mat );
-}
-
-#endif
-
-inline Transform3::Transform3( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-}
-
-inline Transform3::Transform3( vec_float4 scalar )
-{
-    mCol0 = Vector3( scalar );
-    mCol1 = Vector3( scalar );
-    mCol2 = Vector3( scalar );
-    mCol3 = Vector3( scalar );
-}
-
-inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
-{
-    mCol0 = _col0;
-    mCol1 = _col1;
-    mCol2 = _col2;
-    mCol3 = _col3;
-}
-
-inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
-{
-    this->setUpper3x3( tfrm );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
-{
-    this->setUpper3x3( Matrix3( unitQuat ) );
-    this->setTranslation( translateVec );
-}
-
-inline Transform3::Transform3( const Aos::Transform3 & tfrm )
-{
-    mCol0 = Vector3( tfrm.getCol0() );
-    mCol1 = Vector3( tfrm.getCol1() );
-    mCol2 = Vector3( tfrm.getCol2() );
-    mCol3 = Vector3( tfrm.getCol3() );
-}
-
-inline Transform3::Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 )
-{
-    mCol0 = Vector3( tfrm0.getCol0(), tfrm1.getCol0(), tfrm2.getCol0(), tfrm3.getCol0() );
-    mCol1 = Vector3( tfrm0.getCol1(), tfrm1.getCol1(), tfrm2.getCol1(), tfrm3.getCol1() );
-    mCol2 = Vector3( tfrm0.getCol2(), tfrm1.getCol2(), tfrm2.getCol2(), tfrm3.getCol2() );
-    mCol3 = Vector3( tfrm0.getCol3(), tfrm1.getCol3(), tfrm2.getCol3(), tfrm3.getCol3() );
-}
-
-inline void Transform3::get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const
-{
-    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
-    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol0( tmpV3_0 );
-    result1.setCol0( tmpV3_1 );
-    result2.setCol0( tmpV3_2 );
-    result3.setCol0( tmpV3_3 );
-    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol1( tmpV3_0 );
-    result1.setCol1( tmpV3_1 );
-    result2.setCol1( tmpV3_2 );
-    result3.setCol1( tmpV3_3 );
-    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol2( tmpV3_0 );
-    result1.setCol2( tmpV3_1 );
-    result2.setCol2( tmpV3_2 );
-    result3.setCol2( tmpV3_3 );
-    mCol3.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
-    result0.setCol3( tmpV3_0 );
-    result1.setCol3( tmpV3_1 );
-    result2.setCol3( tmpV3_2 );
-    result3.setCol3( tmpV3_3 );
-}
-
-inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
-{
-    mCol0 = _col0;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
-{
-    mCol1 = _col1;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
-{
-    mCol2 = _col2;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
-{
-    mCol3 = _col3;
-    return *this;
-}
-
-inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
-{
-    *(&mCol0 + col) = vec;
-    return *this;
-}
-
-inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
-{
-    mCol0.setElem( row, vec.getElem( 0 ) );
-    mCol1.setElem( row, vec.getElem( 1 ) );
-    mCol2.setElem( row, vec.getElem( 2 ) );
-    mCol3.setElem( row, vec.getElem( 3 ) );
-    return *this;
-}
-
-inline Transform3 & Transform3::setElem( int col, int row, vec_float4 val )
-{
-    Vector3 tmpV3_0;
-    tmpV3_0 = this->getCol( col );
-    tmpV3_0.setElem( row, val );
-    this->setCol( col, tmpV3_0 );
-    return *this;
-}
-
-inline vec_float4 Transform3::getElem( int col, int row ) const
-{
-    return this->getCol( col ).getElem( row );
-}
-
-inline const Vector3 Transform3::getCol0( ) const
-{
-    return mCol0;
-}
-
-inline const Vector3 Transform3::getCol1( ) const
-{
-    return mCol1;
-}
-
-inline const Vector3 Transform3::getCol2( ) const
-{
-    return mCol2;
-}
-
-inline const Vector3 Transform3::getCol3( ) const
-{
-    return mCol3;
-}
-
-inline const Vector3 Transform3::getCol( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector4 Transform3::getRow( int row ) const
-{
-    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
-}
-
-inline Vector3 & Transform3::operator []( int col )
-{
-    return *(&mCol0 + col);
-}
-
-inline const Vector3 Transform3::operator []( int col ) const
-{
-    return *(&mCol0 + col);
-}
-
-inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
-{
-    mCol0 = tfrm.mCol0;
-    mCol1 = tfrm.mCol1;
-    mCol2 = tfrm.mCol2;
-    mCol3 = tfrm.mCol3;
-    return *this;
-}
-
-inline const Transform3 inverse( const Transform3 & tfrm )
-{
-    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
-    vec_float4 detinv;
-    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
-    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
-    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
-    detinv = recipf4( dot( tfrm.getCol2(), tmp2 ) );
-    inv0 = Vector3( spu_mul( tmp0.getX(), detinv ), spu_mul( tmp1.getX(), detinv ), spu_mul( tmp2.getX(), detinv ) );
-    inv1 = Vector3( spu_mul( tmp0.getY(), detinv ), spu_mul( tmp1.getY(), detinv ), spu_mul( tmp2.getY(), detinv ) );
-    inv2 = Vector3( spu_mul( tmp0.getZ(), detinv ), spu_mul( tmp1.getZ(), detinv ), spu_mul( tmp2.getZ(), detinv ) );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 orthoInverse( const Transform3 & tfrm )
-{
-    Vector3 inv0, inv1, inv2;
-    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
-    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
-    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
-    return Transform3(
-        inv0,
-        inv1,
-        inv2,
-        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
-    );
-}
-
-inline const Transform3 absPerElem( const Transform3 & tfrm )
-{
-    return Transform3(
-        absPerElem( tfrm.getCol0() ),
-        absPerElem( tfrm.getCol1() ),
-        absPerElem( tfrm.getCol2() ),
-        absPerElem( tfrm.getCol3() )
-    );
-}
-
-inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
-{
-    return Vector3(
-        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
-        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) )
-    );
-}
-
-inline const Point3 Transform3::operator *( const Point3 & pnt ) const
-{
-    return Point3(
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), pnt.getX() ), spu_mul( mCol1.getX(), pnt.getY() ) ), spu_mul( mCol2.getX(), pnt.getZ() ) ), mCol3.getX() ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), pnt.getX() ), spu_mul( mCol1.getY(), pnt.getY() ) ), spu_mul( mCol2.getY(), pnt.getZ() ) ), mCol3.getY() ),
-        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), pnt.getX() ), spu_mul( mCol1.getZ(), pnt.getY() ) ), spu_mul( mCol2.getZ(), pnt.getZ() ) ), mCol3.getZ() )
-    );
-}
-
-inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
-{
-    return Transform3(
-        ( *this * tfrm.mCol0 ),
-        ( *this * tfrm.mCol1 ),
-        ( *this * tfrm.mCol2 ),
-        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
-    );
-}
-
-inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
-{
-    *this = *this * tfrm;
-    return *this;
-}
-
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
-{
-    return Transform3(
-        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
-        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
-        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
-        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
-    );
-}
-
-inline const Transform3 Transform3::identity( )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
-{
-    mCol0 = tfrm.getCol0();
-    mCol1 = tfrm.getCol1();
-    mCol2 = tfrm.getCol2();
-    return *this;
-}
-
-inline const Matrix3 Transform3::getUpper3x3( ) const
-{
-    return Matrix3( mCol0, mCol1, mCol2 );
-}
-
-inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
-{
-    mCol3 = translateVec;
-    return *this;
-}
-
-inline const Vector3 Transform3::getTranslation( ) const
-{
-    return mCol3;
-}
-
-inline const Transform3 Transform3::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3( spu_splats(0.0f), c, s ),
-        Vector3( spu_splats(0.0f), negatef4( s ), c ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline const Transform3 Transform3::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3( c, spu_splats(0.0f), negatef4( s ) ),
-        Vector3::yAxis( ),
-        Vector3( s, spu_splats(0.0f), c ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline const Transform3 Transform3::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c;
-    sincosf4( radians, &s, &c );
-    return Transform3(
-        Vector3( c, s, spu_splats(0.0f) ),
-        Vector3( negatef4( s ), c, spu_splats(0.0f) ),
-        Vector3::zAxis( ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
-{
-    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
-    sincosf4( radiansXYZ.getX(), &sX, &cX );
-    sincosf4( radiansXYZ.getY(), &sY, &cY );
-    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
-    tmp0 = spu_mul( cZ, sY );
-    tmp1 = spu_mul( sZ, sY );
-    return Transform3(
-        Vector3( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) ),
-        Vector3( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) ),
-        Vector3( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline const Transform3 Transform3::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( spu_splats(0.0f) ) );
-}
-
-inline const Transform3 Transform3::rotation( const Quat & unitQuat )
-{
-    return Transform3( Matrix3( unitQuat ), Vector3( spu_splats(0.0f) ) );
-}
-
-inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
-{
-    return Transform3(
-        Vector3( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f) ),
-        Vector3( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f) ),
-        Vector3( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ() ),
-        Vector3( spu_splats(0.0f) )
-    );
-}
-
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
-{
-    return Transform3(
-        ( tfrm.getCol0() * scaleVec.getX( ) ),
-        ( tfrm.getCol1() * scaleVec.getY( ) ),
-        ( tfrm.getCol2() * scaleVec.getZ( ) ),
-        tfrm.getCol3()
-    );
-}
-
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
-{
-    return Transform3(
-        mulPerElem( tfrm.getCol0(), scaleVec ),
-        mulPerElem( tfrm.getCol1(), scaleVec ),
-        mulPerElem( tfrm.getCol2(), scaleVec ),
-        mulPerElem( tfrm.getCol3(), scaleVec )
-    );
-}
-
-inline const Transform3 Transform3::translation( const Vector3 & translateVec )
-{
-    return Transform3(
-        Vector3::xAxis( ),
-        Vector3::yAxis( ),
-        Vector3::zAxis( ),
-        translateVec
-    );
-}
-
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 )
-{
-    return Transform3(
-        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
-        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
-        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
-        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Transform3 & tfrm )
-{
-    Aos::Transform3 mat0, mat1, mat2, mat3;
-    tfrm.get4Aos( mat0, mat1, mat2, mat3 );
-    printf("slot 0:\n");
-    print( mat0 );
-    printf("slot 1:\n");
-    print( mat1 );
-    printf("slot 2:\n");
-    print( mat2 );
-    printf("slot 3:\n");
-    print( mat3 );
-}
-
-inline void print( const Transform3 & tfrm, const char * name )
-{
-    printf("%s:\n", name);
-    print( tfrm );
-}
-
-#endif
-
-inline Quat::Quat( const Matrix3 & tfrm )
-{
-    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
-    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
-    vec_uint4 largestXorY, largestYorZ, largestZorX;
-
-    xx = tfrm.getCol0().getX();
-    yx = tfrm.getCol0().getY();
-    zx = tfrm.getCol0().getZ();
-    xy = tfrm.getCol1().getX();
-    yy = tfrm.getCol1().getY();
-    zy = tfrm.getCol1().getZ();
-    xz = tfrm.getCol2().getX();
-    yz = tfrm.getCol2().getY();
-    zz = tfrm.getCol2().getZ();
-
-    trace = spu_add( spu_add( xx, yy ), zz );
-
-    negTrace = spu_cmpgt( spu_splats(0.0f), trace );
-    ZgtX = spu_cmpgt( zz, xx );
-    ZgtY = spu_cmpgt( zz, yy );
-    YgtX = spu_cmpgt( yy, xx );
-    largestXorY = spu_and( negTrace, spu_nand( ZgtX, ZgtY ) );
-    largestYorZ = spu_and( negTrace, spu_or( YgtX, ZgtX ) );
-    largestZorX = spu_and( negTrace, spu_orc( ZgtY, YgtX ) );
-    
-    zz = spu_sel( zz, negatef4(zz), largestXorY );
-    xy = spu_sel( xy, negatef4(xy), largestXorY );
-    xx = spu_sel( xx, negatef4(xx), largestYorZ );
-    yz = spu_sel( yz, negatef4(yz), largestYorZ );
-    yy = spu_sel( yy, negatef4(yy), largestZorX );
-    zx = spu_sel( zx, negatef4(zx), largestZorX );
-
-    radicand = spu_add( spu_add( spu_add( xx, yy ), zz ), spu_splats(1.0f) );
-    scale = spu_mul( spu_splats(0.5f), rsqrtf4( radicand ) );
-
-    tmpx = spu_mul( spu_sub( zy, yz ), scale );
-    tmpy = spu_mul( spu_sub( xz, zx ), scale );
-    tmpz = spu_mul( spu_sub( yx, xy ), scale );
-    tmpw = spu_mul( radicand, scale );
-    qx = tmpx;
-    qy = tmpy;
-    qz = tmpz;
-    qw = tmpw;
-
-    qx = spu_sel( qx, tmpw, largestXorY );
-    qy = spu_sel( qy, tmpz, largestXorY );
-    qz = spu_sel( qz, tmpy, largestXorY );
-    qw = spu_sel( qw, tmpx, largestXorY );
-    tmpx = qx;
-    tmpz = qz;
-    qx = spu_sel( qx, qy, largestYorZ );
-    qy = spu_sel( qy, tmpx, largestYorZ );
-    qz = spu_sel( qz, qw, largestYorZ );
-    qw = spu_sel( qw, tmpz, largestYorZ );
-
-    mX = qx;
-    mY = qy;
-    mZ = qz;
-    mW = qw;
-}
-
-inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
-{
-    return Matrix3(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) )
-    );
-}
-
-inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
-{
-    return Matrix4(
-        ( tfrm0 * tfrm1.getX( ) ),
-        ( tfrm0 * tfrm1.getY( ) ),
-        ( tfrm0 * tfrm1.getZ( ) ),
-        ( tfrm0 * tfrm1.getW( ) )
-    );
-}
-
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Vector3(
-        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol0().getX() ), spu_mul( vec.getY(), mat.getCol0().getY() ) ), spu_mul( vec.getZ(), mat.getCol0().getZ() ) ),
-        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol1().getX() ), spu_mul( vec.getY(), mat.getCol1().getY() ) ), spu_mul( vec.getZ(), mat.getCol1().getZ() ) ),
-        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol2().getX() ), spu_mul( vec.getY(), mat.getCol2().getY() ) ), spu_mul( vec.getZ(), mat.getCol2().getZ() ) )
-    );
-}
-
-inline const Matrix3 crossMatrix( const Vector3 & vec )
-{
-    return Matrix3(
-        Vector3( spu_splats(0.0f), vec.getZ(), negatef4( vec.getY() ) ),
-        Vector3( negatef4( vec.getZ() ), spu_splats(0.0f), vec.getX() ),
-        Vector3( vec.getY(), negatef4( vec.getX() ), spu_splats(0.0f) )
-    );
-}
-
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
-{
-    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
-}
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_MAT_SOA_CPP_H
+#define _VECTORMATH_MAT_SOA_CPP_H
+
+namespace Vectormath {
+namespace Soa {
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( vec_float4 scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const Quat & unitQuat )
+{
+    vec_float4 qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat.getX();
+    qy = unitQuat.getY();
+    qz = unitQuat.getZ();
+    qw = unitQuat.getW();
+    qx2 = spu_add( qx, qx );
+    qy2 = spu_add( qy, qy );
+    qz2 = spu_add( qz, qz );
+    qxqx2 = spu_mul( qx, qx2 );
+    qxqy2 = spu_mul( qx, qy2 );
+    qxqz2 = spu_mul( qx, qz2 );
+    qxqw2 = spu_mul( qw, qx2 );
+    qyqy2 = spu_mul( qy, qy2 );
+    qyqz2 = spu_mul( qy, qz2 );
+    qyqw2 = spu_mul( qw, qy2 );
+    qzqz2 = spu_mul( qz, qz2 );
+    qzqw2 = spu_mul( qw, qz2 );
+    mCol0 = Vector3( spu_sub( spu_sub( spu_splats(1.0f), qyqy2 ), qzqz2 ), spu_add( qxqy2, qzqw2 ), spu_sub( qxqz2, qyqw2 ) );
+    mCol1 = Vector3( spu_sub( qxqy2, qzqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qzqz2 ), spu_add( qyqz2, qxqw2 ) );
+    mCol2 = Vector3( spu_add( qxqz2, qyqw2 ), spu_sub( qyqz2, qxqw2 ), spu_sub( spu_sub( spu_splats(1.0f), qxqx2 ), qyqy2 ) );
+}
+
+inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3::Matrix3( const Aos::Matrix3 & mat )
+{
+    mCol0 = Vector3( mat.getCol0() );
+    mCol1 = Vector3( mat.getCol1() );
+    mCol2 = Vector3( mat.getCol2() );
+}
+
+inline Matrix3::Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 )
+{
+    mCol0 = Vector3( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
+    mCol1 = Vector3( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
+    mCol2 = Vector3( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
+}
+
+inline void Matrix3::get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const
+{
+    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol0( tmpV3_0 );
+    result1.setCol0( tmpV3_1 );
+    result2.setCol0( tmpV3_2 );
+    result3.setCol0( tmpV3_3 );
+    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol1( tmpV3_0 );
+    result1.setCol1( tmpV3_1 );
+    result2.setCol1( tmpV3_2 );
+    result3.setCol1( tmpV3_3 );
+    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol2( tmpV3_0 );
+    result1.setCol2( tmpV3_1 );
+    result2.setCol2( tmpV3_2 );
+    result3.setCol2( tmpV3_3 );
+}
+
+inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, vec_float4 val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    return Matrix3(
+        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
+        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
+        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    Vector3 tmp0, tmp1, tmp2;
+    vec_float4 detinv;
+    tmp0 = cross( mat.getCol1(), mat.getCol2() );
+    tmp1 = cross( mat.getCol2(), mat.getCol0() );
+    tmp2 = cross( mat.getCol0(), mat.getCol1() );
+    detinv = recipf4( dot( mat.getCol2(), tmp2 ) );
+    return Matrix3(
+        Vector3( spu_mul( tmp0.getX(), detinv ), spu_mul( tmp1.getX(), detinv ), spu_mul( tmp2.getX(), detinv ) ),
+        Vector3( spu_mul( tmp0.getY(), detinv ), spu_mul( tmp1.getY(), detinv ), spu_mul( tmp2.getY(), detinv ) ),
+        Vector3( spu_mul( tmp0.getZ(), detinv ), spu_mul( tmp1.getZ(), detinv ), spu_mul( tmp2.getZ(), detinv ) )
+    );
+}
+
+inline vec_float4 determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( vec_float4 scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( spu_splats(0.0f), c, s ),
+        Vector3( spu_splats(0.0f), negatef4( s ), c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3( c, spu_splats(0.0f), negatef4( s ) ),
+        Vector3::yAxis( ),
+        Vector3( s, spu_splats(0.0f), c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix3(
+        Vector3( c, s, spu_splats(0.0f) ),
+        Vector3( negatef4( s ), c, spu_splats(0.0f) ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    return Matrix3(
+        Vector3( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) ),
+        Vector3( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) ),
+        Vector3( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = spu_mul( x, y );
+    yz = spu_mul( y, z );
+    zx = spu_mul( z, x );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    return Matrix3(
+        Vector3( spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ) ),
+        Vector3( spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ) ),
+        Vector3( spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
+{
+    return Matrix3(
+        Vector3( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector3( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f) ),
+        Vector3( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ() )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    Aos::Matrix3 mat0, mat1, mat2, mat3;
+    mat.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( vec_float4 scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
+    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
+    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
+    mCol3 = Vector4( mat.getCol3(), spu_splats(1.0f) );
+}
+
+inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
+    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
+    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
+    mCol3 = Vector4( translateVec, spu_splats(1.0f) );
+}
+
+inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), spu_splats(0.0f) );
+    mCol1 = Vector4( mat.getCol1(), spu_splats(0.0f) );
+    mCol2 = Vector4( mat.getCol2(), spu_splats(0.0f) );
+    mCol3 = Vector4( translateVec, spu_splats(1.0f) );
+}
+
+inline Matrix4::Matrix4( const Aos::Matrix4 & mat )
+{
+    mCol0 = Vector4( mat.getCol0() );
+    mCol1 = Vector4( mat.getCol1() );
+    mCol2 = Vector4( mat.getCol2() );
+    mCol3 = Vector4( mat.getCol3() );
+}
+
+inline Matrix4::Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 )
+{
+    mCol0 = Vector4( mat0.getCol0(), mat1.getCol0(), mat2.getCol0(), mat3.getCol0() );
+    mCol1 = Vector4( mat0.getCol1(), mat1.getCol1(), mat2.getCol1(), mat3.getCol1() );
+    mCol2 = Vector4( mat0.getCol2(), mat1.getCol2(), mat2.getCol2(), mat3.getCol2() );
+    mCol3 = Vector4( mat0.getCol3(), mat1.getCol3(), mat2.getCol3(), mat3.getCol3() );
+}
+
+inline void Matrix4::get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const
+{
+    Aos::Vector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    mCol0.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol0( tmpV4_0 );
+    result1.setCol0( tmpV4_1 );
+    result2.setCol0( tmpV4_2 );
+    result3.setCol0( tmpV4_3 );
+    mCol1.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol1( tmpV4_0 );
+    result1.setCol1( tmpV4_1 );
+    result2.setCol1( tmpV4_2 );
+    result3.setCol1( tmpV4_3 );
+    mCol2.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol2( tmpV4_0 );
+    result1.setCol2( tmpV4_1 );
+    result2.setCol2( tmpV4_2 );
+    result3.setCol2( tmpV4_3 );
+    mCol3.get4Aos( tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3 );
+    result0.setCol3( tmpV4_0 );
+    result1.setCol3( tmpV4_1 );
+    result2.setCol3( tmpV4_2 );
+    result3.setCol3( tmpV4_3 );
+}
+
+inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, vec_float4 val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    return Matrix4(
+        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
+        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
+        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
+        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    Vector4 res0, res1, res2, res3;
+    vec_float4 mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
+    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
+    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
+    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
+    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
+    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
+    res0.setX( spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) ) );
+    res0.setY( spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) ) );
+    res0.setZ( spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) ) );
+    res0.setW( spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) ) );
+    detInv = recipf4( spu_add( spu_add( spu_add( spu_mul( mA, res0.getX() ), spu_mul( mE, res0.getY() ) ), spu_mul( mI, res0.getZ() ) ), spu_mul( mM, res0.getW() ) ) );
+    res1.setX( spu_mul( mI, tmp1 ) );
+    res1.setY( spu_mul( mM, tmp0 ) );
+    res1.setZ( spu_mul( mA, tmp1 ) );
+    res1.setW( spu_mul( mE, tmp0 ) );
+    res3.setX( spu_mul( mI, tmp3 ) );
+    res3.setY( spu_mul( mM, tmp2 ) );
+    res3.setZ( spu_mul( mA, tmp3 ) );
+    res3.setW( spu_mul( mE, tmp2 ) );
+    res2.setX( spu_mul( mI, tmp5 ) );
+    res2.setY( spu_mul( mM, tmp4 ) );
+    res2.setZ( spu_mul( mA, tmp5 ) );
+    res2.setW( spu_mul( mE, tmp4 ) );
+    tmp0 = spu_sub( spu_mul( mI, mB ), spu_mul( mA, mJ ) );
+    tmp1 = spu_sub( spu_mul( mM, mF ), spu_mul( mE, mN ) );
+    tmp2 = spu_sub( spu_mul( mI, mD ), spu_mul( mA, mL ) );
+    tmp3 = spu_sub( spu_mul( mM, mH ), spu_mul( mE, mP ) );
+    tmp4 = spu_sub( spu_mul( mI, mC ), spu_mul( mA, mK ) );
+    tmp5 = spu_sub( spu_mul( mM, mG ), spu_mul( mE, mO ) );
+    res2.setX( spu_add( spu_sub( spu_mul( mL, tmp1 ), spu_mul( mJ, tmp3 ) ), res2.getX() ) );
+    res2.setY( spu_add( spu_sub( spu_mul( mP, tmp0 ), spu_mul( mN, tmp2 ) ), res2.getY() ) );
+    res2.setZ( spu_sub( spu_sub( spu_mul( mB, tmp3 ), spu_mul( mD, tmp1 ) ), res2.getZ() ) );
+    res2.setW( spu_sub( spu_sub( spu_mul( mF, tmp2 ), spu_mul( mH, tmp0 ) ), res2.getW() ) );
+    res3.setX( spu_add( spu_sub( spu_mul( mJ, tmp5 ), spu_mul( mK, tmp1 ) ), res3.getX() ) );
+    res3.setY( spu_add( spu_sub( spu_mul( mN, tmp4 ), spu_mul( mO, tmp0 ) ), res3.getY() ) );
+    res3.setZ( spu_sub( spu_sub( spu_mul( mC, tmp1 ), spu_mul( mB, tmp5 ) ), res3.getZ() ) );
+    res3.setW( spu_sub( spu_sub( spu_mul( mG, tmp0 ), spu_mul( mF, tmp4 ) ), res3.getW() ) );
+    res1.setX( spu_sub( spu_sub( spu_mul( mK, tmp3 ), spu_mul( mL, tmp5 ) ), res1.getX() ) );
+    res1.setY( spu_sub( spu_sub( spu_mul( mO, tmp2 ), spu_mul( mP, tmp4 ) ), res1.getY() ) );
+    res1.setZ( spu_add( spu_sub( spu_mul( mD, tmp5 ), spu_mul( mC, tmp3 ) ), res1.getZ() ) );
+    res1.setW( spu_add( spu_sub( spu_mul( mH, tmp4 ), spu_mul( mG, tmp2 ) ), res1.getW() ) );
+    return Matrix4(
+        ( res0 * detInv ),
+        ( res1 * detInv ),
+        ( res2 * detInv ),
+        ( res3 * detInv )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline vec_float4 determinant( const Matrix4 & mat )
+{
+    vec_float4 dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = spu_sub( spu_mul( mK, mD ), spu_mul( mC, mL ) );
+    tmp1 = spu_sub( spu_mul( mO, mH ), spu_mul( mG, mP ) );
+    tmp2 = spu_sub( spu_mul( mB, mK ), spu_mul( mJ, mC ) );
+    tmp3 = spu_sub( spu_mul( mF, mO ), spu_mul( mN, mG ) );
+    tmp4 = spu_sub( spu_mul( mJ, mD ), spu_mul( mB, mL ) );
+    tmp5 = spu_sub( spu_mul( mN, mH ), spu_mul( mF, mP ) );
+    dx = spu_sub( spu_sub( spu_mul( mJ, tmp1 ), spu_mul( mL, tmp3 ) ), spu_mul( mK, tmp5 ) );
+    dy = spu_sub( spu_sub( spu_mul( mN, tmp0 ), spu_mul( mP, tmp2 ) ), spu_mul( mO, tmp4 ) );
+    dz = spu_sub( spu_add( spu_mul( mD, tmp3 ), spu_mul( mC, tmp5 ) ), spu_mul( mB, tmp1 ) );
+    dw = spu_sub( spu_add( spu_mul( mH, tmp2 ), spu_mul( mG, tmp4 ) ), spu_mul( mF, tmp0 ) );
+    return spu_add( spu_add( spu_add( spu_mul( mA, dx ), spu_mul( mE, dy ) ), spu_mul( mI, dz ) ), spu_mul( mM, dw ) );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( vec_float4 scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
+{
+    return Vector4(
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ), spu_mul( mCol3.getX(), vec.getW() ) ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ), spu_mul( mCol3.getY(), vec.getW() ) ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) ), spu_mul( mCol3.getZ(), vec.getW() ) ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getW(), vec.getX() ), spu_mul( mCol1.getW(), vec.getY() ) ), spu_mul( mCol2.getW(), vec.getZ() ) ), spu_mul( mCol3.getW(), vec.getW() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
+{
+    return Vector4(
+        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getW(), vec.getX() ), spu_mul( mCol1.getW(), vec.getY() ) ), spu_mul( mCol2.getW(), vec.getZ() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
+{
+    return Vector4(
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), pnt.getX() ), spu_mul( mCol1.getX(), pnt.getY() ) ), spu_mul( mCol2.getX(), pnt.getZ() ) ), mCol3.getX() ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), pnt.getX() ), spu_mul( mCol1.getY(), pnt.getY() ) ), spu_mul( mCol2.getY(), pnt.getZ() ) ), mCol3.getY() ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), pnt.getX() ), spu_mul( mCol1.getZ(), pnt.getY() ) ), spu_mul( mCol2.getZ(), pnt.getZ() ) ), mCol3.getZ() ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getW(), pnt.getX() ), spu_mul( mCol1.getW(), pnt.getY() ) ), spu_mul( mCol2.getW(), pnt.getZ() ) ), mCol3.getW() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( spu_splats(0.0f), c, s, spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), negatef4( s ), c, spu_splats(0.0f) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4( c, spu_splats(0.0f), negatef4( s ), spu_splats(0.0f) ),
+        Vector4::yAxis( ),
+        Vector4( s, spu_splats(0.0f), c, spu_splats(0.0f) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Matrix4(
+        Vector4( c, s, spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( negatef4( s ), c, spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    return Matrix4(
+        Vector4( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ), spu_splats(0.0f) ),
+        Vector4( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ), spu_splats(0.0f) ),
+        Vector4( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ), spu_splats(0.0f) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 x, y, z, s, c, oneMinusC, xy, yz, zx;
+    sincosf4( radians, &s, &c );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = spu_mul( x, y );
+    yz = spu_mul( y, z );
+    zx = spu_mul( z, x );
+    oneMinusC = spu_sub( spu_splats(1.0f), c );
+    return Matrix4(
+        Vector4( spu_add( spu_mul( spu_mul( x, x ), oneMinusC ), c ), spu_add( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_sub( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_splats(0.0f) ),
+        Vector4( spu_sub( spu_mul( xy, oneMinusC ), spu_mul( z, s ) ), spu_add( spu_mul( spu_mul( y, y ), oneMinusC ), c ), spu_add( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_splats(0.0f) ),
+        Vector4( spu_add( spu_mul( zx, oneMinusC ), spu_mul( y, s ) ), spu_sub( spu_mul( yz, oneMinusC ), spu_mul( x, s ) ), spu_add( spu_mul( spu_mul( z, z ), oneMinusC ), c ), spu_splats(0.0f) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
+{
+    return Matrix4(
+        Vector4( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ(), spu_splats(0.0f) ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, spu_splats(1.0f) );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, spu_splats(1.0f) )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 f, rangeInv;
+    f = tanf4( spu_sub( spu_splats( _VECTORMATH_PI_OVER_2 ), spu_mul( spu_splats(0.5f), fovyRadians ) ) );
+    rangeInv = recipf4( spu_sub( zNear, zFar ) );
+    return Matrix4(
+        Vector4( divf4( f, aspect ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), f, spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_add( zNear, zFar ), rangeInv ), spu_splats(-1.0f) ),
+        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( spu_mul( zNear, zFar ), rangeInv ), spu_splats(2.0f) ), spu_splats(0.0f) )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = spu_add( right, left );
+    sum_tb = spu_add( top, bottom );
+    sum_nf = spu_add( zNear, zFar );
+    inv_rl = recipf4( spu_sub( right, left ) );
+    inv_tb = recipf4( spu_sub( top, bottom ) );
+    inv_nf = recipf4( spu_sub( zNear, zFar ) );
+    n2 = spu_add( zNear, zNear );
+    return Matrix4(
+        Vector4( spu_mul( n2, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), spu_mul( n2, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_mul( sum_rl, inv_rl ), spu_mul( sum_tb, inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(-1.0f) ),
+        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_mul( spu_mul( n2, inv_nf ), zFar ), spu_splats(0.0f) )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar )
+{
+    vec_float4 sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = spu_add( right, left );
+    sum_tb = spu_add( top, bottom );
+    sum_nf = spu_add( zNear, zFar );
+    inv_rl = recipf4( spu_sub( right, left ) );
+    inv_tb = recipf4( spu_sub( top, bottom ) );
+    inv_nf = recipf4( spu_sub( zNear, zFar ) );
+    return Matrix4(
+        Vector4( spu_add( inv_rl, inv_rl ), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), spu_add( inv_tb, inv_tb ), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_add( inv_nf, inv_nf ), spu_splats(0.0f) ),
+        Vector4( spu_mul( negatef4( sum_rl ), inv_rl ), spu_mul( negatef4( sum_tb ), inv_tb ), spu_mul( sum_nf, inv_nf ), spu_splats(1.0f) )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    Aos::Matrix4 mat0, mat1, mat2, mat3;
+    mat.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( vec_float4 scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Aos::Transform3 & tfrm )
+{
+    mCol0 = Vector3( tfrm.getCol0() );
+    mCol1 = Vector3( tfrm.getCol1() );
+    mCol2 = Vector3( tfrm.getCol2() );
+    mCol3 = Vector3( tfrm.getCol3() );
+}
+
+inline Transform3::Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 )
+{
+    mCol0 = Vector3( tfrm0.getCol0(), tfrm1.getCol0(), tfrm2.getCol0(), tfrm3.getCol0() );
+    mCol1 = Vector3( tfrm0.getCol1(), tfrm1.getCol1(), tfrm2.getCol1(), tfrm3.getCol1() );
+    mCol2 = Vector3( tfrm0.getCol2(), tfrm1.getCol2(), tfrm2.getCol2(), tfrm3.getCol2() );
+    mCol3 = Vector3( tfrm0.getCol3(), tfrm1.getCol3(), tfrm2.getCol3(), tfrm3.getCol3() );
+}
+
+inline void Transform3::get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const
+{
+    Aos::Vector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    mCol0.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol0( tmpV3_0 );
+    result1.setCol0( tmpV3_1 );
+    result2.setCol0( tmpV3_2 );
+    result3.setCol0( tmpV3_3 );
+    mCol1.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol1( tmpV3_0 );
+    result1.setCol1( tmpV3_1 );
+    result2.setCol1( tmpV3_2 );
+    result3.setCol1( tmpV3_3 );
+    mCol2.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol2( tmpV3_0 );
+    result1.setCol2( tmpV3_1 );
+    result2.setCol2( tmpV3_2 );
+    result3.setCol2( tmpV3_3 );
+    mCol3.get4Aos( tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3 );
+    result0.setCol3( tmpV3_0 );
+    result1.setCol3( tmpV3_1 );
+    result2.setCol3( tmpV3_2 );
+    result3.setCol3( tmpV3_3 );
+}
+
+inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, vec_float4 val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline vec_float4 Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
+    vec_float4 detinv;
+    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
+    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
+    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
+    detinv = recipf4( dot( tfrm.getCol2(), tmp2 ) );
+    inv0 = Vector3( spu_mul( tmp0.getX(), detinv ), spu_mul( tmp1.getX(), detinv ), spu_mul( tmp2.getX(), detinv ) );
+    inv1 = Vector3( spu_mul( tmp0.getY(), detinv ), spu_mul( tmp1.getY(), detinv ), spu_mul( tmp2.getY(), detinv ) );
+    inv2 = Vector3( spu_mul( tmp0.getZ(), detinv ), spu_mul( tmp1.getZ(), detinv ), spu_mul( tmp2.getZ(), detinv ) );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    Vector3 inv0, inv1, inv2;
+    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
+    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
+    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        spu_add( spu_add( spu_mul( mCol0.getX(), vec.getX() ), spu_mul( mCol1.getX(), vec.getY() ) ), spu_mul( mCol2.getX(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getY(), vec.getX() ), spu_mul( mCol1.getY(), vec.getY() ) ), spu_mul( mCol2.getY(), vec.getZ() ) ),
+        spu_add( spu_add( spu_mul( mCol0.getZ(), vec.getX() ), spu_mul( mCol1.getZ(), vec.getY() ) ), spu_mul( mCol2.getZ(), vec.getZ() ) )
+    );
+}
+
+inline const Point3 Transform3::operator *( const Point3 & pnt ) const
+{
+    return Point3(
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getX(), pnt.getX() ), spu_mul( mCol1.getX(), pnt.getY() ) ), spu_mul( mCol2.getX(), pnt.getZ() ) ), mCol3.getX() ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getY(), pnt.getX() ), spu_mul( mCol1.getY(), pnt.getY() ) ), spu_mul( mCol2.getY(), pnt.getZ() ) ), mCol3.getY() ),
+        spu_add( spu_add( spu_add( spu_mul( mCol0.getZ(), pnt.getX() ), spu_mul( mCol1.getZ(), pnt.getY() ) ), spu_mul( mCol2.getZ(), pnt.getZ() ) ), mCol3.getZ() )
+    );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( spu_splats(0.0f), c, s ),
+        Vector3( spu_splats(0.0f), negatef4( s ), c ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3( c, spu_splats(0.0f), negatef4( s ) ),
+        Vector3::yAxis( ),
+        Vector3( s, spu_splats(0.0f), c ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c;
+    sincosf4( radians, &s, &c );
+    return Transform3(
+        Vector3( c, s, spu_splats(0.0f) ),
+        Vector3( negatef4( s ), c, spu_splats(0.0f) ),
+        Vector3::zAxis( ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    vec_float4 sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sincosf4( radiansXYZ.getX(), &sX, &cX );
+    sincosf4( radiansXYZ.getY(), &sY, &cY );
+    sincosf4( radiansXYZ.getZ(), &sZ, &cZ );
+    tmp0 = spu_mul( cZ, sY );
+    tmp1 = spu_mul( sZ, sY );
+    return Transform3(
+        Vector3( spu_mul( cZ, cY ), spu_mul( sZ, cY ), negatef4( sY ) ),
+        Vector3( spu_sub( spu_mul( tmp0, sX ), spu_mul( sZ, cX ) ), spu_add( spu_mul( tmp1, sX ), spu_mul( cZ, cX ) ), spu_mul( cY, sX ) ),
+        Vector3( spu_add( spu_mul( tmp0, cX ), spu_mul( sZ, sX ) ), spu_sub( spu_mul( tmp1, cX ), spu_mul( cZ, sX ) ), spu_mul( cY, cX ) ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline const Transform3 Transform3::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( spu_splats(0.0f) ) );
+}
+
+inline const Transform3 Transform3::rotation( const Quat & unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( spu_splats(0.0f) ) );
+}
+
+inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
+{
+    return Transform3(
+        Vector3( scaleVec.getX(), spu_splats(0.0f), spu_splats(0.0f) ),
+        Vector3( spu_splats(0.0f), scaleVec.getY(), spu_splats(0.0f) ),
+        Vector3( spu_splats(0.0f), spu_splats(0.0f), scaleVec.getZ() ),
+        Vector3( spu_splats(0.0f) )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( const Vector3 & translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    Aos::Transform3 mat0, mat1, mat2, mat3;
+    tfrm.get4Aos( mat0, mat1, mat2, mat3 );
+    printf("slot 0:\n");
+    print( mat0 );
+    printf("slot 1:\n");
+    print( mat1 );
+    printf("slot 2:\n");
+    print( mat2 );
+    printf("slot 3:\n");
+    print( mat3 );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    vec_float4 trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    vec_uint4 negTrace, ZgtX, ZgtY, YgtX;
+    vec_uint4 largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm.getCol0().getX();
+    yx = tfrm.getCol0().getY();
+    zx = tfrm.getCol0().getZ();
+    xy = tfrm.getCol1().getX();
+    yy = tfrm.getCol1().getY();
+    zy = tfrm.getCol1().getZ();
+    xz = tfrm.getCol2().getX();
+    yz = tfrm.getCol2().getY();
+    zz = tfrm.getCol2().getZ();
+
+    trace = spu_add( spu_add( xx, yy ), zz );
+
+    negTrace = spu_cmpgt( spu_splats(0.0f), trace );
+    ZgtX = spu_cmpgt( zz, xx );
+    ZgtY = spu_cmpgt( zz, yy );
+    YgtX = spu_cmpgt( yy, xx );
+    largestXorY = spu_and( negTrace, spu_nand( ZgtX, ZgtY ) );
+    largestYorZ = spu_and( negTrace, spu_or( YgtX, ZgtX ) );
+    largestZorX = spu_and( negTrace, spu_orc( ZgtY, YgtX ) );
+    
+    zz = spu_sel( zz, negatef4(zz), largestXorY );
+    xy = spu_sel( xy, negatef4(xy), largestXorY );
+    xx = spu_sel( xx, negatef4(xx), largestYorZ );
+    yz = spu_sel( yz, negatef4(yz), largestYorZ );
+    yy = spu_sel( yy, negatef4(yy), largestZorX );
+    zx = spu_sel( zx, negatef4(zx), largestZorX );
+
+    radicand = spu_add( spu_add( spu_add( xx, yy ), zz ), spu_splats(1.0f) );
+    scale = spu_mul( spu_splats(0.5f), rsqrtf4( radicand ) );
+
+    tmpx = spu_mul( spu_sub( zy, yz ), scale );
+    tmpy = spu_mul( spu_sub( xz, zx ), scale );
+    tmpz = spu_mul( spu_sub( yx, xy ), scale );
+    tmpw = spu_mul( radicand, scale );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    qx = spu_sel( qx, tmpw, largestXorY );
+    qy = spu_sel( qy, tmpz, largestXorY );
+    qz = spu_sel( qz, tmpy, largestXorY );
+    qw = spu_sel( qw, tmpx, largestXorY );
+    tmpx = qx;
+    tmpz = qz;
+    qx = spu_sel( qx, qy, largestYorZ );
+    qy = spu_sel( qy, tmpx, largestYorZ );
+    qz = spu_sel( qz, qw, largestYorZ );
+    qw = spu_sel( qw, tmpz, largestYorZ );
+
+    mX = qx;
+    mY = qy;
+    mZ = qz;
+    mW = qw;
+}
+
+inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Vector3(
+        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol0().getX() ), spu_mul( vec.getY(), mat.getCol0().getY() ) ), spu_mul( vec.getZ(), mat.getCol0().getZ() ) ),
+        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol1().getX() ), spu_mul( vec.getY(), mat.getCol1().getY() ) ), spu_mul( vec.getZ(), mat.getCol1().getZ() ) ),
+        spu_add( spu_add( spu_mul( vec.getX(), mat.getCol2().getX() ), spu_mul( vec.getY(), mat.getCol2().getY() ) ), spu_mul( vec.getZ(), mat.getCol2().getZ() ) )
+    );
+}
+
+inline const Matrix3 crossMatrix( const Vector3 & vec )
+{
+    return Matrix3(
+        Vector3( spu_splats(0.0f), vec.getZ(), negatef4( vec.getY() ) ),
+        Vector3( negatef4( vec.getZ() ), spu_splats(0.0f), vec.getX() ),
+        Vector3( vec.getY(), negatef4( vec.getX() ), spu_splats(0.0f) )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_aos.h
index a41bc69aa..88f30de00 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_aos.h
@@ -1,417 +1,417 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_AOS_CPP_H
-#define _VECTORMATH_QUAT_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline Quat::Quat( float _x, float _y, float _z, float _w )
-{
-    mVec128 = (vec_float4){ _x, _y, _z, _w };
-}
-
-inline Quat::Quat( Vector3 xyz, float _w )
-{
-    mVec128 = spu_shuffle( xyz.get128(), spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
-}
-
-inline Quat::Quat( Vector4 vec )
-{
-    mVec128 = vec.get128();
-}
-
-inline Quat::Quat( float scalar )
-{
-    mVec128 = spu_splats( scalar );
-}
-
-inline Quat::Quat( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( _VECTORMATH_UNIT_0001 );
-}
-
-inline const Quat lerp( float t, Quat quat0, Quat quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 )
-{
-    Quat start;
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
-    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start = Quat( spu_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    return Quat( spu_madd( start.get128(), scale0, spu_mul( unitQuat1.get128(), scale1 ) ) );
-}
-
-inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
-}
-
-inline vec_float4 Quat::get128( ) const
-{
-    return mVec128;
-}
-
-inline Quat & Quat::operator =( Quat quat )
-{
-    mVec128 = quat.mVec128;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( Vector3 vec )
-{
-    mVec128 = spu_sel( vec.get128(), mVec128, (vec_uint4)spu_maskb(0x000f) );
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mVec128 );
-}
-
-inline Quat & Quat::setX( float _x )
-{
-    mVec128 = spu_insert( _x, mVec128, 0 );
-    return *this;
-}
-
-inline float Quat::getX( ) const
-{
-    return spu_extract( mVec128, 0 );
-}
-
-inline Quat & Quat::setY( float _y )
-{
-    mVec128 = spu_insert( _y, mVec128, 1 );
-    return *this;
-}
-
-inline float Quat::getY( ) const
-{
-    return spu_extract( mVec128, 1 );
-}
-
-inline Quat & Quat::setZ( float _z )
-{
-    mVec128 = spu_insert( _z, mVec128, 2 );
-    return *this;
-}
-
-inline float Quat::getZ( ) const
-{
-    return spu_extract( mVec128, 2 );
-}
-
-inline Quat & Quat::setW( float _w )
-{
-    mVec128 = spu_insert( _w, mVec128, 3 );
-    return *this;
-}
-
-inline float Quat::getW( ) const
-{
-    return spu_extract( mVec128, 3 );
-}
-
-inline Quat & Quat::setElem( int idx, float value )
-{
-    mVec128 = spu_insert( value, mVec128, idx );
-    return *this;
-}
-
-inline float Quat::getElem( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline VecIdx Quat::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline float Quat::operator []( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline const Quat Quat::operator +( Quat quat ) const
-{
-    return Quat( spu_add( mVec128, quat.mVec128 ) );
-}
-
-inline const Quat Quat::operator -( Quat quat ) const
-{
-    return Quat( spu_sub( mVec128, quat.mVec128 ) );
-}
-
-inline const Quat Quat::operator *( float scalar ) const
-{
-    return Quat( spu_mul( mVec128, spu_splats(scalar) ) );
-}
-
-inline Quat & Quat::operator +=( Quat quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( Quat quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( float scalar ) const
-{
-    return Quat( divf4( mVec128, spu_splats(scalar) ) );
-}
-
-inline Quat & Quat::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat( negatef4( mVec128 ) );
-}
-
-inline const Quat operator *( float scalar, Quat quat )
-{
-    return quat * scalar;
-}
-
-inline float dot( Quat quat0, Quat quat1 )
-{
-    return spu_extract( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
-}
-
-inline float norm( Quat quat )
-{
-    return spu_extract( _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
-}
-
-inline float length( Quat quat )
-{
-    return sqrtf( norm( quat ) );
-}
-
-inline const Quat normalize( Quat quat )
-{
-    vec_float4 dot = _vmathVfDot4( quat.get128(), quat.get128() );
-    return Quat( spu_mul( quat.get128(), rsqrtf4( dot ) ) );
-}
-
-inline const Quat Quat::rotation( Vector3 unitVec0, Vector3 unitVec1 )
-{
-    Vector3 crossVec;
-    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
-    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, (vec_uchar16)spu_splats(0x00010203) );
-    cosAngleX2Plus2 = spu_madd( cosAngle, spu_splats(2.0f), spu_splats(2.0f) );
-    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
-    cosHalfAngleX2 = spu_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
-    crossVec = cross( unitVec0, unitVec1 );
-    res = spu_mul( crossVec.get128(), recipCosHalfAngleX2 );
-    res = spu_sel( res, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ), (vec_uint4)spu_maskb(0x000f) );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotation( float radians, Vector3 unitVec )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_mul( unitVec.get128(), s ), c, (vec_uint4)spu_maskb(0x000f) );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationX( float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0xf000) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationY( float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x0f00) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    return Quat( res );
-}
-
-inline const Quat Quat::rotationZ( float radians )
-{
-    vec_float4 s, c, angle, res;
-    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x00f0) );
-    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
-    return Quat( res );
-}
-
-inline const Quat Quat::operator *( Quat quat ) const
-{
-    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
-    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
-    ldata = mVec128;
-    rdata = quat.mVec128;
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    tmp0 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_YZXW );
-    qv = spu_mul( spu_shuffle( ldata, ldata, shuffle_wwww ), rdata );
-    qv = spu_madd( spu_shuffle( rdata, rdata, shuffle_wwww ), ldata, qv );
-    qv = spu_madd( tmp0, tmp1, qv );
-    qv = spu_nmsub( tmp2, tmp3, qv );
-    product = spu_mul( ldata, rdata );
-    l_wxyz = spu_rlqwbyte( ldata, 12 );
-    r_wxyz = spu_rlqwbyte( rdata, 12 );
-    qw = spu_nmsub( l_wxyz, r_wxyz, product );
-    xy = spu_madd( l_wxyz, r_wxyz, product );
-    qw = spu_sub( qw, spu_rlqwbyte( xy, 8 ) );
-    return Quat( spu_sel( qv, qw, (vec_uint4)spu_maskb( 0x000f ) ) );
-}
-
-inline Quat & Quat::operator *=( Quat quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( Quat quat, Vector3 vec )
-{
-    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
-    qdata = quat.get128();
-    vdata = vec.get128();
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    tmp0 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_YZXW );
-    wwww = spu_shuffle( qdata, qdata, shuffle_wwww );
-    qv = spu_mul( wwww, vdata );
-    qv = spu_madd( tmp0, tmp1, qv );
-    qv = spu_nmsub( tmp2, tmp3, qv );
-    product = spu_mul( qdata, vdata );
-    qw = spu_madd( spu_rlqwbyte( qdata, 4 ), spu_rlqwbyte( vdata, 4 ), product );
-    qw = spu_add( spu_rlqwbyte( product, 8 ), qw );
-    tmp1 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_YZXW );
-    res = spu_mul( spu_shuffle( qw, qw, shuffle_xxxx ), qdata );
-    res = spu_madd( wwww, qv, res );
-    res = spu_madd( tmp0, tmp1, res );
-    res = spu_nmsub( tmp2, tmp3, res );
-    return Vector3( res );
-}
-
-inline const Quat conj( Quat quat )
-{
-    return Quat( spu_xor( quat.get128(), ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) ) );
-}
-
-inline const Quat select( Quat quat0, Quat quat1, bool select1 )
-{
-    return Quat( spu_sel( quat0.get128(), quat1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Quat quat )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat.get128();
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-inline void print( Quat quat, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = quat.get128();
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Quat::Quat( float _x, float _y, float _z, float _w )
+{
+    mVec128 = (vec_float4){ _x, _y, _z, _w };
+}
+
+inline Quat::Quat( Vector3 xyz, float _w )
+{
+    mVec128 = spu_shuffle( xyz.get128(), spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
+}
+
+inline Quat::Quat( Vector4 vec )
+{
+    mVec128 = vec.get128();
+}
+
+inline Quat::Quat( float scalar )
+{
+    mVec128 = spu_splats( scalar );
+}
+
+inline Quat::Quat( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( _VECTORMATH_UNIT_0001 );
+}
+
+inline const Quat lerp( float t, Quat quat0, Quat quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 )
+{
+    Quat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
+    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start = Quat( spu_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    return Quat( spu_madd( start.get128(), scale0, spu_mul( unitQuat1.get128(), scale1 ) ) );
+}
+
+inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
+}
+
+inline vec_float4 Quat::get128( ) const
+{
+    return mVec128;
+}
+
+inline Quat & Quat::operator =( Quat quat )
+{
+    mVec128 = quat.mVec128;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( Vector3 vec )
+{
+    mVec128 = spu_sel( vec.get128(), mVec128, (vec_uint4)spu_maskb(0x000f) );
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+inline Quat & Quat::setX( float _x )
+{
+    mVec128 = spu_insert( _x, mVec128, 0 );
+    return *this;
+}
+
+inline float Quat::getX( ) const
+{
+    return spu_extract( mVec128, 0 );
+}
+
+inline Quat & Quat::setY( float _y )
+{
+    mVec128 = spu_insert( _y, mVec128, 1 );
+    return *this;
+}
+
+inline float Quat::getY( ) const
+{
+    return spu_extract( mVec128, 1 );
+}
+
+inline Quat & Quat::setZ( float _z )
+{
+    mVec128 = spu_insert( _z, mVec128, 2 );
+    return *this;
+}
+
+inline float Quat::getZ( ) const
+{
+    return spu_extract( mVec128, 2 );
+}
+
+inline Quat & Quat::setW( float _w )
+{
+    mVec128 = spu_insert( _w, mVec128, 3 );
+    return *this;
+}
+
+inline float Quat::getW( ) const
+{
+    return spu_extract( mVec128, 3 );
+}
+
+inline Quat & Quat::setElem( int idx, float value )
+{
+    mVec128 = spu_insert( value, mVec128, idx );
+    return *this;
+}
+
+inline float Quat::getElem( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline VecIdx Quat::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline float Quat::operator []( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline const Quat Quat::operator +( Quat quat ) const
+{
+    return Quat( spu_add( mVec128, quat.mVec128 ) );
+}
+
+inline const Quat Quat::operator -( Quat quat ) const
+{
+    return Quat( spu_sub( mVec128, quat.mVec128 ) );
+}
+
+inline const Quat Quat::operator *( float scalar ) const
+{
+    return Quat( spu_mul( mVec128, spu_splats(scalar) ) );
+}
+
+inline Quat & Quat::operator +=( Quat quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( Quat quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( float scalar ) const
+{
+    return Quat( divf4( mVec128, spu_splats(scalar) ) );
+}
+
+inline Quat & Quat::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat( negatef4( mVec128 ) );
+}
+
+inline const Quat operator *( float scalar, Quat quat )
+{
+    return quat * scalar;
+}
+
+inline float dot( Quat quat0, Quat quat1 )
+{
+    return spu_extract( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
+}
+
+inline float norm( Quat quat )
+{
+    return spu_extract( _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
+}
+
+inline float length( Quat quat )
+{
+    return sqrtf( norm( quat ) );
+}
+
+inline const Quat normalize( Quat quat )
+{
+    vec_float4 dot = _vmathVfDot4( quat.get128(), quat.get128() );
+    return Quat( spu_mul( quat.get128(), rsqrtf4( dot ) ) );
+}
+
+inline const Quat Quat::rotation( Vector3 unitVec0, Vector3 unitVec1 )
+{
+    Vector3 crossVec;
+    vec_float4 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, (vec_uchar16)spu_splats(0x00010203) );
+    cosAngleX2Plus2 = spu_madd( cosAngle, spu_splats(2.0f), spu_splats(2.0f) );
+    recipCosHalfAngleX2 = rsqrtf4( cosAngleX2Plus2 );
+    cosHalfAngleX2 = spu_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
+    crossVec = cross( unitVec0, unitVec1 );
+    res = spu_mul( crossVec.get128(), recipCosHalfAngleX2 );
+    res = spu_sel( res, spu_mul( cosHalfAngleX2, spu_splats(0.5f) ), (vec_uint4)spu_maskb(0x000f) );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotation( float radians, Vector3 unitVec )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_mul( unitVec.get128(), s ), c, (vec_uint4)spu_maskb(0x000f) );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationX( float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0xf000) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationY( float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x0f00) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    return Quat( res );
+}
+
+inline const Quat Quat::rotationZ( float radians )
+{
+    vec_float4 s, c, angle, res;
+    angle = spu_mul( spu_splats(radians), spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    res = spu_sel( spu_splats(0.0f), s, (vec_uint4)spu_maskb(0x00f0) );
+    res = spu_sel( res, c, (vec_uint4)spu_maskb(0x000f) );
+    return Quat( res );
+}
+
+inline const Quat Quat::operator *( Quat quat ) const
+{
+    vec_float4 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    vec_float4 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = mVec128;
+    rdata = quat.mVec128;
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    tmp0 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( ldata, ldata, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( rdata, rdata, _VECTORMATH_SHUF_YZXW );
+    qv = spu_mul( spu_shuffle( ldata, ldata, shuffle_wwww ), rdata );
+    qv = spu_madd( spu_shuffle( rdata, rdata, shuffle_wwww ), ldata, qv );
+    qv = spu_madd( tmp0, tmp1, qv );
+    qv = spu_nmsub( tmp2, tmp3, qv );
+    product = spu_mul( ldata, rdata );
+    l_wxyz = spu_rlqwbyte( ldata, 12 );
+    r_wxyz = spu_rlqwbyte( rdata, 12 );
+    qw = spu_nmsub( l_wxyz, r_wxyz, product );
+    xy = spu_madd( l_wxyz, r_wxyz, product );
+    qw = spu_sub( qw, spu_rlqwbyte( xy, 8 ) );
+    return Quat( spu_sel( qv, qw, (vec_uint4)spu_maskb( 0x000f ) ) );
+}
+
+inline Quat & Quat::operator *=( Quat quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( Quat quat, Vector3 vec )
+{
+    vec_float4 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat.get128();
+    vdata = vec.get128();
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    tmp0 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( qdata, qdata, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( vdata, vdata, _VECTORMATH_SHUF_YZXW );
+    wwww = spu_shuffle( qdata, qdata, shuffle_wwww );
+    qv = spu_mul( wwww, vdata );
+    qv = spu_madd( tmp0, tmp1, qv );
+    qv = spu_nmsub( tmp2, tmp3, qv );
+    product = spu_mul( qdata, vdata );
+    qw = spu_madd( spu_rlqwbyte( qdata, 4 ), spu_rlqwbyte( vdata, 4 ), product );
+    qw = spu_add( spu_rlqwbyte( product, 8 ), qw );
+    tmp1 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( qv, qv, _VECTORMATH_SHUF_YZXW );
+    res = spu_mul( spu_shuffle( qw, qw, shuffle_xxxx ), qdata );
+    res = spu_madd( wwww, qv, res );
+    res = spu_madd( tmp0, tmp1, res );
+    res = spu_nmsub( tmp2, tmp3, res );
+    return Vector3( res );
+}
+
+inline const Quat conj( Quat quat )
+{
+    return Quat( spu_xor( quat.get128(), ((vec_float4)(vec_int4){0x80000000,0x80000000,0x80000000,0}) ) );
+}
+
+inline const Quat select( Quat quat0, Quat quat1, bool select1 )
+{
+    return Quat( spu_sel( quat0.get128(), quat1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Quat quat )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+inline void print( Quat quat, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_soa.h
index 88c2884fa..675457f04 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/quat_soa.h
@@ -1,483 +1,483 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_QUAT_SOA_CPP_H
-#define _VECTORMATH_QUAT_SOA_CPP_H
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Soa {
-
-inline Quat::Quat( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-}
-
-inline Quat::Quat( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Quat::Quat( const Vector3 & xyz, vec_float4 _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Quat::Quat( const Vector4 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = vec.getW();
-}
-
-inline Quat::Quat( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline Quat::Quat( Aos::Quat quat )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_float4 vec128 = quat.get128();
-    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
-    mW = spu_shuffle( vec128, vec128, shuffle_wwww );
-}
-
-inline Quat::Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( quat0.get128(), quat2.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( quat1.get128(), quat3.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( quat0.get128(), quat2.get128(), _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( quat1.get128(), quat3.get128(), _VECTORMATH_SHUF_ZCWD );
-    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    mW = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-}
-
-inline const Quat Quat::identity( )
-{
-    return Quat( spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 )
-{
-    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
-}
-
-inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 )
-{
-    Quat start;
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitQuat0, unitQuat1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
-    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
-    start.setX( spu_sel( unitQuat0.getX(), negatef4( unitQuat0.getX() ), selectMask ) );
-    start.setY( spu_sel( unitQuat0.getY(), negatef4( unitQuat0.getY() ), selectMask ) );
-    start.setZ( spu_sel( unitQuat0.getZ(), negatef4( unitQuat0.getZ() ), selectMask ) );
-    start.setW( spu_sel( unitQuat0.getW(), negatef4( unitQuat0.getW() ), selectMask ) );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
-}
-
-inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
-{
-    Quat tmp0, tmp1;
-    tmp0 = slerp( t, unitQuat0, unitQuat3 );
-    tmp1 = slerp( t, unitQuat1, unitQuat2 );
-    return slerp( spu_mul( spu_mul( spu_splats(2.0f), t ), spu_sub( spu_splats(1.0f), t ) ), tmp0, tmp1 );
-}
-
-inline void Quat::get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_ZCWD );
-    result0 = Aos::Quat( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
-    result1 = Aos::Quat( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
-    result2 = Aos::Quat( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
-    result3 = Aos::Quat( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
-}
-
-inline Quat & Quat::operator =( const Quat & quat )
-{
-    mX = quat.mX;
-    mY = quat.mY;
-    mZ = quat.mZ;
-    mW = quat.mW;
-    return *this;
-}
-
-inline Quat & Quat::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Quat::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Quat & Quat::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Quat::getX( ) const
-{
-    return mX;
-}
-
-inline Quat & Quat::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Quat::getY( ) const
-{
-    return mY;
-}
-
-inline Quat & Quat::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Quat::getZ( ) const
-{
-    return mZ;
-}
-
-inline Quat & Quat::setW( vec_float4 _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline vec_float4 Quat::getW( ) const
-{
-    return mW;
-}
-
-inline Quat & Quat::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Quat::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Quat::vec_float4_t & Quat::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Quat::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Quat Quat::operator +( const Quat & quat ) const
-{
-    return Quat(
-        spu_add( mX, quat.mX ),
-        spu_add( mY, quat.mY ),
-        spu_add( mZ, quat.mZ ),
-        spu_add( mW, quat.mW )
-    );
-}
-
-inline const Quat Quat::operator -( const Quat & quat ) const
-{
-    return Quat(
-        spu_sub( mX, quat.mX ),
-        spu_sub( mY, quat.mY ),
-        spu_sub( mZ, quat.mZ ),
-        spu_sub( mW, quat.mW )
-    );
-}
-
-inline const Quat Quat::operator *( vec_float4 scalar ) const
-{
-    return Quat(
-        spu_mul( mX, scalar ),
-        spu_mul( mY, scalar ),
-        spu_mul( mZ, scalar ),
-        spu_mul( mW, scalar )
-    );
-}
-
-inline Quat & Quat::operator +=( const Quat & quat )
-{
-    *this = *this + quat;
-    return *this;
-}
-
-inline Quat & Quat::operator -=( const Quat & quat )
-{
-    *this = *this - quat;
-    return *this;
-}
-
-inline Quat & Quat::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator /( vec_float4 scalar ) const
-{
-    return Quat(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar ),
-        divf4( mW, scalar )
-    );
-}
-
-inline Quat & Quat::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Quat Quat::operator -( ) const
-{
-    return Quat(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ ),
-        negatef4( mW )
-    );
-}
-
-inline const Quat operator *( vec_float4 scalar, const Quat & quat )
-{
-    return quat * scalar;
-}
-
-inline vec_float4 dot( const Quat & quat0, const Quat & quat1 )
-{
-    vec_float4 result;
-    result = spu_mul( quat0.getX(), quat1.getX() );
-    result = spu_add( result, spu_mul( quat0.getY(), quat1.getY() ) );
-    result = spu_add( result, spu_mul( quat0.getZ(), quat1.getZ() ) );
-    result = spu_add( result, spu_mul( quat0.getW(), quat1.getW() ) );
-    return result;
-}
-
-inline vec_float4 norm( const Quat & quat )
-{
-    vec_float4 result;
-    result = spu_mul( quat.getX(), quat.getX() );
-    result = spu_add( result, spu_mul( quat.getY(), quat.getY() ) );
-    result = spu_add( result, spu_mul( quat.getZ(), quat.getZ() ) );
-    result = spu_add( result, spu_mul( quat.getW(), quat.getW() ) );
-    return result;
-}
-
-inline vec_float4 length( const Quat & quat )
-{
-    return sqrtf4( norm( quat ) );
-}
-
-inline const Quat normalize( const Quat & quat )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = norm( quat );
-    lenInv = rsqrtf4( lenSqr );
-    return Quat(
-        spu_mul( quat.getX(), lenInv ),
-        spu_mul( quat.getY(), lenInv ),
-        spu_mul( quat.getZ(), lenInv ),
-        spu_mul( quat.getW(), lenInv )
-    );
-}
-
-inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
-    cosHalfAngleX2 = sqrtf4( spu_mul( spu_splats(2.0f), spu_add( spu_splats(1.0f), dot( unitVec0, unitVec1 ) ) ) );
-    recipCosHalfAngleX2 = recipf4( cosHalfAngleX2 );
-    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), spu_mul( cosHalfAngleX2, spu_splats(0.5f) ) );
-}
-
-inline const Quat Quat::rotation( vec_float4 radians, const Vector3 & unitVec )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    return Quat( ( unitVec * s ), c );
-}
-
-inline const Quat Quat::rotationX( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    return Quat( s, spu_splats(0.0f), spu_splats(0.0f), c );
-}
-
-inline const Quat Quat::rotationY( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    return Quat( spu_splats(0.0f), s, spu_splats(0.0f), c );
-}
-
-inline const Quat Quat::rotationZ( vec_float4 radians )
-{
-    vec_float4 s, c, angle;
-    angle = spu_mul( radians, spu_splats(0.5f) );
-    sincosf4( angle, &s, &c );
-    return Quat( spu_splats(0.0f), spu_splats(0.0f), s, c );
-}
-
-inline const Quat Quat::operator *( const Quat & quat ) const
-{
-    return Quat(
-        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mX ), spu_mul( mX, quat.mW ) ), spu_mul( mY, quat.mZ ) ), spu_mul( mZ, quat.mY ) ),
-        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mY ), spu_mul( mY, quat.mW ) ), spu_mul( mZ, quat.mX ) ), spu_mul( mX, quat.mZ ) ),
-        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mZ ), spu_mul( mZ, quat.mW ) ), spu_mul( mX, quat.mY ) ), spu_mul( mY, quat.mX ) ),
-        spu_sub( spu_sub( spu_sub( spu_mul( mW, quat.mW ), spu_mul( mX, quat.mX ) ), spu_mul( mY, quat.mY ) ), spu_mul( mZ, quat.mZ ) )
-    );
-}
-
-inline Quat & Quat::operator *=( const Quat & quat )
-{
-    *this = *this * quat;
-    return *this;
-}
-
-inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
-{
-    vec_float4 tmpX, tmpY, tmpZ, tmpW;
-    tmpX = spu_sub( spu_add( spu_mul( quat.getW(), vec.getX() ), spu_mul( quat.getY(), vec.getZ() ) ), spu_mul( quat.getZ(), vec.getY() ) );
-    tmpY = spu_sub( spu_add( spu_mul( quat.getW(), vec.getY() ), spu_mul( quat.getZ(), vec.getX() ) ), spu_mul( quat.getX(), vec.getZ() ) );
-    tmpZ = spu_sub( spu_add( spu_mul( quat.getW(), vec.getZ() ), spu_mul( quat.getX(), vec.getY() ) ), spu_mul( quat.getY(), vec.getX() ) );
-    tmpW = spu_add( spu_add( spu_mul( quat.getX(), vec.getX() ), spu_mul( quat.getY(), vec.getY() ) ), spu_mul( quat.getZ(), vec.getZ() ) );
-    return Vector3(
-        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getX() ), spu_mul( tmpX, quat.getW() ) ), spu_mul( tmpY, quat.getZ() ) ), spu_mul( tmpZ, quat.getY() ) ),
-        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getY() ), spu_mul( tmpY, quat.getW() ) ), spu_mul( tmpZ, quat.getX() ) ), spu_mul( tmpX, quat.getZ() ) ),
-        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getZ() ), spu_mul( tmpZ, quat.getW() ) ), spu_mul( tmpX, quat.getY() ) ), spu_mul( tmpY, quat.getX() ) )
-    );
-}
-
-inline const Quat conj( const Quat & quat )
-{
-    return Quat( negatef4( quat.getX() ), negatef4( quat.getY() ), negatef4( quat.getZ() ), quat.getW() );
-}
-
-inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 )
-{
-    return Quat(
-        spu_sel( quat0.getX(), quat1.getX(), select1 ),
-        spu_sel( quat0.getY(), quat1.getY(), select1 ),
-        spu_sel( quat0.getZ(), quat1.getZ(), select1 ),
-        spu_sel( quat0.getW(), quat1.getW(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Quat & quat )
-{
-    Aos::Quat vec0, vec1, vec2, vec3;
-    quat.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Quat & quat, const char * name )
-{
-    Aos::Quat vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    quat.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_QUAT_SOA_CPP_H
+#define _VECTORMATH_QUAT_SOA_CPP_H
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Soa {
+
+inline Quat::Quat( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+}
+
+inline Quat::Quat( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Quat::Quat( const Vector3 & xyz, vec_float4 _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Quat::Quat( const Vector4 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = vec.getW();
+}
+
+inline Quat::Quat( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline Quat::Quat( Aos::Quat quat )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_float4 vec128 = quat.get128();
+    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
+    mW = spu_shuffle( vec128, vec128, shuffle_wwww );
+}
+
+inline Quat::Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( quat0.get128(), quat2.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( quat1.get128(), quat3.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( quat0.get128(), quat2.get128(), _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( quat1.get128(), quat3.get128(), _VECTORMATH_SHUF_ZCWD );
+    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    mW = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 )
+{
+    Quat start;
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitQuat0, unitQuat1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(0.0f), cosAngle );
+    cosAngle = spu_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start.setX( spu_sel( unitQuat0.getX(), negatef4( unitQuat0.getX() ), selectMask ) );
+    start.setY( spu_sel( unitQuat0.getY(), negatef4( unitQuat0.getY() ), selectMask ) );
+    start.setZ( spu_sel( unitQuat0.getZ(), negatef4( unitQuat0.getZ() ), selectMask ) );
+    start.setW( spu_sel( unitQuat0.getW(), negatef4( unitQuat0.getW() ), selectMask ) );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
+}
+
+inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( spu_mul( spu_mul( spu_splats(2.0f), t ), spu_sub( spu_splats(1.0f), t ) ), tmp0, tmp1 );
+}
+
+inline void Quat::get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_ZCWD );
+    result0 = Aos::Quat( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
+    result1 = Aos::Quat( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
+    result2 = Aos::Quat( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
+    result3 = Aos::Quat( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
+}
+
+inline Quat & Quat::operator =( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Quat & Quat::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Quat::getX( ) const
+{
+    return mX;
+}
+
+inline Quat & Quat::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Quat::getY( ) const
+{
+    return mY;
+}
+
+inline Quat & Quat::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Quat::getZ( ) const
+{
+    return mZ;
+}
+
+inline Quat & Quat::setW( vec_float4 _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline vec_float4 Quat::getW( ) const
+{
+    return mW;
+}
+
+inline Quat & Quat::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Quat::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Quat::vec_float4_t & Quat::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Quat::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Quat Quat::operator +( const Quat & quat ) const
+{
+    return Quat(
+        spu_add( mX, quat.mX ),
+        spu_add( mY, quat.mY ),
+        spu_add( mZ, quat.mZ ),
+        spu_add( mW, quat.mW )
+    );
+}
+
+inline const Quat Quat::operator -( const Quat & quat ) const
+{
+    return Quat(
+        spu_sub( mX, quat.mX ),
+        spu_sub( mY, quat.mY ),
+        spu_sub( mZ, quat.mZ ),
+        spu_sub( mW, quat.mW )
+    );
+}
+
+inline const Quat Quat::operator *( vec_float4 scalar ) const
+{
+    return Quat(
+        spu_mul( mX, scalar ),
+        spu_mul( mY, scalar ),
+        spu_mul( mZ, scalar ),
+        spu_mul( mW, scalar )
+    );
+}
+
+inline Quat & Quat::operator +=( const Quat & quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( const Quat & quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( vec_float4 scalar ) const
+{
+    return Quat(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar ),
+        divf4( mW, scalar )
+    );
+}
+
+inline Quat & Quat::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ ),
+        negatef4( mW )
+    );
+}
+
+inline const Quat operator *( vec_float4 scalar, const Quat & quat )
+{
+    return quat * scalar;
+}
+
+inline vec_float4 dot( const Quat & quat0, const Quat & quat1 )
+{
+    vec_float4 result;
+    result = spu_mul( quat0.getX(), quat1.getX() );
+    result = spu_add( result, spu_mul( quat0.getY(), quat1.getY() ) );
+    result = spu_add( result, spu_mul( quat0.getZ(), quat1.getZ() ) );
+    result = spu_add( result, spu_mul( quat0.getW(), quat1.getW() ) );
+    return result;
+}
+
+inline vec_float4 norm( const Quat & quat )
+{
+    vec_float4 result;
+    result = spu_mul( quat.getX(), quat.getX() );
+    result = spu_add( result, spu_mul( quat.getY(), quat.getY() ) );
+    result = spu_add( result, spu_mul( quat.getZ(), quat.getZ() ) );
+    result = spu_add( result, spu_mul( quat.getW(), quat.getW() ) );
+    return result;
+}
+
+inline vec_float4 length( const Quat & quat )
+{
+    return sqrtf4( norm( quat ) );
+}
+
+inline const Quat normalize( const Quat & quat )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = norm( quat );
+    lenInv = rsqrtf4( lenSqr );
+    return Quat(
+        spu_mul( quat.getX(), lenInv ),
+        spu_mul( quat.getY(), lenInv ),
+        spu_mul( quat.getZ(), lenInv ),
+        spu_mul( quat.getW(), lenInv )
+    );
+}
+
+inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    vec_float4 cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf4( spu_mul( spu_splats(2.0f), spu_add( spu_splats(1.0f), dot( unitVec0, unitVec1 ) ) ) );
+    recipCosHalfAngleX2 = recipf4( cosHalfAngleX2 );
+    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), spu_mul( cosHalfAngleX2, spu_splats(0.5f) ) );
+}
+
+inline const Quat Quat::rotation( vec_float4 radians, const Vector3 & unitVec )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    return Quat( ( unitVec * s ), c );
+}
+
+inline const Quat Quat::rotationX( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    return Quat( s, spu_splats(0.0f), spu_splats(0.0f), c );
+}
+
+inline const Quat Quat::rotationY( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    return Quat( spu_splats(0.0f), s, spu_splats(0.0f), c );
+}
+
+inline const Quat Quat::rotationZ( vec_float4 radians )
+{
+    vec_float4 s, c, angle;
+    angle = spu_mul( radians, spu_splats(0.5f) );
+    sincosf4( angle, &s, &c );
+    return Quat( spu_splats(0.0f), spu_splats(0.0f), s, c );
+}
+
+inline const Quat Quat::operator *( const Quat & quat ) const
+{
+    return Quat(
+        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mX ), spu_mul( mX, quat.mW ) ), spu_mul( mY, quat.mZ ) ), spu_mul( mZ, quat.mY ) ),
+        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mY ), spu_mul( mY, quat.mW ) ), spu_mul( mZ, quat.mX ) ), spu_mul( mX, quat.mZ ) ),
+        spu_sub( spu_add( spu_add( spu_mul( mW, quat.mZ ), spu_mul( mZ, quat.mW ) ), spu_mul( mX, quat.mY ) ), spu_mul( mY, quat.mX ) ),
+        spu_sub( spu_sub( spu_sub( spu_mul( mW, quat.mW ), spu_mul( mX, quat.mX ) ), spu_mul( mY, quat.mY ) ), spu_mul( mZ, quat.mZ ) )
+    );
+}
+
+inline Quat & Quat::operator *=( const Quat & quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
+{
+    vec_float4 tmpX, tmpY, tmpZ, tmpW;
+    tmpX = spu_sub( spu_add( spu_mul( quat.getW(), vec.getX() ), spu_mul( quat.getY(), vec.getZ() ) ), spu_mul( quat.getZ(), vec.getY() ) );
+    tmpY = spu_sub( spu_add( spu_mul( quat.getW(), vec.getY() ), spu_mul( quat.getZ(), vec.getX() ) ), spu_mul( quat.getX(), vec.getZ() ) );
+    tmpZ = spu_sub( spu_add( spu_mul( quat.getW(), vec.getZ() ), spu_mul( quat.getX(), vec.getY() ) ), spu_mul( quat.getY(), vec.getX() ) );
+    tmpW = spu_add( spu_add( spu_mul( quat.getX(), vec.getX() ), spu_mul( quat.getY(), vec.getY() ) ), spu_mul( quat.getZ(), vec.getZ() ) );
+    return Vector3(
+        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getX() ), spu_mul( tmpX, quat.getW() ) ), spu_mul( tmpY, quat.getZ() ) ), spu_mul( tmpZ, quat.getY() ) ),
+        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getY() ), spu_mul( tmpY, quat.getW() ) ), spu_mul( tmpZ, quat.getX() ) ), spu_mul( tmpX, quat.getZ() ) ),
+        spu_add( spu_sub( spu_add( spu_mul( tmpW, quat.getZ() ), spu_mul( tmpZ, quat.getW() ) ), spu_mul( tmpX, quat.getY() ) ), spu_mul( tmpY, quat.getX() ) )
+    );
+}
+
+inline const Quat conj( const Quat & quat )
+{
+    return Quat( negatef4( quat.getX() ), negatef4( quat.getY() ), negatef4( quat.getZ() ), quat.getW() );
+}
+
+inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 )
+{
+    return Quat(
+        spu_sel( quat0.getX(), quat1.getX(), select1 ),
+        spu_sel( quat0.getY(), quat1.getY(), select1 ),
+        spu_sel( quat0.getZ(), quat1.getZ(), select1 ),
+        spu_sel( quat0.getW(), quat1.getW(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Quat & quat )
+{
+    Aos::Quat vec0, vec1, vec2, vec3;
+    quat.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Quat & quat, const char * name )
+{
+    Aos::Quat vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    quat.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_aos.h
index c983f1817..8f677abeb 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_aos.h
@@ -1,1167 +1,1167 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_AOS_CPP_H
-#define _VECTORMATH_VEC_AOS_CPP_H
-//-----------------------------------------------------------------------------
-// Constants
-// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
-#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
-#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
-#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
-#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
-#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
-#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0, vec1 );
-    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
-    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0, vec1 );
-    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
-    return spu_add( spu_rlqwbyte( result, 8 ), result );
-}
-
-static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
-    tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
-    tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
-    tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
-    tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
-    result = spu_mul( tmp0, tmp1 );
-    result = spu_nmsub( tmp2, tmp3, result );
-    return result;
-}
-
-static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
-{
-    vec_int4 bexp;
-    vec_uint4 mant, sign, hfloat;
-    vec_uint4 notZero, isInf;
-    const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
-    const vec_uint4 mergeMant = spu_splats(0x000003ffu);
-    const vec_uint4 mergeSign = spu_splats(0x00008000u);
-
-    sign = spu_rlmask((vec_uint4)v, -16);
-    mant = spu_rlmask((vec_uint4)v, -13);
-    bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
-
-    notZero = spu_cmpgt(bexp, 112);
-    isInf = spu_cmpgt(bexp, 142);
-
-    bexp = spu_add(bexp, -112);
-    bexp = spu_sl(bexp, 10);
-
-    hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
-    hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
-    hfloat = spu_sel(hfloat, hfloatInf, isInf);
-    hfloat = spu_sel(hfloat, sign, mergeSign);
-
-    return hfloat;
-}
-
-static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
-{
-    vec_uint4 hfloat_u, hfloat_v;
-    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
-    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
-    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
-    return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
-}
-
-#endif
-
-namespace Vectormath {
-namespace Aos {
-
-inline VecIdx::operator float() const
-{
-    return spu_extract( ref, i );
-}
-
-inline float VecIdx::operator =( float scalar )
-{
-    ref = spu_insert( scalar, ref, i );
-    return scalar;
-}
-
-inline float VecIdx::operator =( const VecIdx& scalar )
-{
-    return *this = float(scalar);
-}
-
-inline float VecIdx::operator *=( float scalar )
-{
-    float tmp = spu_extract( ref, i ) * scalar;
-    ref = spu_insert( tmp, ref, i );
-    return tmp;
-}
-
-inline float VecIdx::operator /=( float scalar )
-{
-    float tmp = spu_extract( ref, i ) / scalar;
-    ref = spu_insert( tmp, ref, i );
-    return tmp;
-}
-
-inline float VecIdx::operator +=( float scalar )
-{
-    float tmp = spu_extract( ref, i ) + scalar;
-    ref = spu_insert( tmp, ref, i );
-    return tmp;
-}
-
-inline float VecIdx::operator -=( float scalar )
-{
-    float tmp = spu_extract( ref, i ) - scalar;
-    ref = spu_insert( tmp, ref, i );
-    return tmp;
-}
-
-inline Vector3::Vector3( float _x, float _y, float _z )
-{
-    mVec128 = (vec_float4){ _x, _y, _z, 0.0f  };
-}
-
-inline Vector3::Vector3( Point3 pnt )
-{
-    mVec128 = pnt.get128();
-}
-
-inline Vector3::Vector3( float scalar )
-{
-    mVec128 = spu_splats( scalar );
-}
-
-inline Vector3::Vector3( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_1000 );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_0100 );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( _VECTORMATH_UNIT_0010 );
-}
-
-inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    return Vector3( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
-}
-
-inline vec_float4 Vector3::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeXYZ( Vector3 vec, vec_float4 * quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
-    dstVec = spu_sel(vec.get128(), dstVec, mask);
-    *quad = dstVec;
-}
-
-inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
-    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
-    xyz3 = spu_rlqwbyte( zxyz, 4 );
-    vec0 = Vector3( xyzx );
-    vec1 = Vector3( xyz1 );
-    vec2 = Vector3( xyz2 );
-    vec3 = Vector3( xyz3 );
-}
-
-inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = spu_shuffle( vec0.get128(), vec1.get128(), _VECTORMATH_SHUF_XYZA );
-    yzxy = spu_shuffle( vec1.get128(), vec2.get128(), _VECTORMATH_SHUF_YZAB );
-    zxyz = spu_shuffle( vec2.get128(), vec3.get128(), _VECTORMATH_SHUF_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
-    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Vector3 & Vector3::operator =( Vector3 vec )
-{
-    mVec128 = vec.mVec128;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( float _x )
-{
-    mVec128 = spu_insert( _x, mVec128, 0 );
-    return *this;
-}
-
-inline float Vector3::getX( ) const
-{
-    return spu_extract( mVec128, 0 );
-}
-
-inline Vector3 & Vector3::setY( float _y )
-{
-    mVec128 = spu_insert( _y, mVec128, 1 );
-    return *this;
-}
-
-inline float Vector3::getY( ) const
-{
-    return spu_extract( mVec128, 1 );
-}
-
-inline Vector3 & Vector3::setZ( float _z )
-{
-    mVec128 = spu_insert( _z, mVec128, 2 );
-    return *this;
-}
-
-inline float Vector3::getZ( ) const
-{
-    return spu_extract( mVec128, 2 );
-}
-
-inline Vector3 & Vector3::setElem( int idx, float value )
-{
-    mVec128 = spu_insert( value, mVec128, idx );
-    return *this;
-}
-
-inline float Vector3::getElem( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline VecIdx Vector3::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline float Vector3::operator []( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline const Vector3 Vector3::operator +( Vector3 vec ) const
-{
-    return Vector3( spu_add( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector3 Vector3::operator -( Vector3 vec ) const
-{
-    return Vector3( spu_sub( mVec128, vec.mVec128 ) );
-}
-
-inline const Point3 Vector3::operator +( Point3 pnt ) const
-{
-    return Point3( spu_add( mVec128, pnt.get128() ) );
-}
-
-inline const Vector3 Vector3::operator *( float scalar ) const
-{
-    return Vector3( spu_mul( mVec128, spu_splats(scalar) ) );
-}
-
-inline Vector3 & Vector3::operator +=( Vector3 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( Vector3 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( float scalar ) const
-{
-    return Vector3( divf4( mVec128, spu_splats(scalar) ) );
-}
-
-inline Vector3 & Vector3::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3( negatef4( mVec128 ) );
-}
-
-inline const Vector3 operator *( float scalar, Vector3 vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( spu_mul( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( divf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 recipPerElem( Vector3 vec )
-{
-    return Vector3( recipf4( vec.get128() ) );
-}
-
-inline const Vector3 sqrtPerElem( Vector3 vec )
-{
-    return Vector3( sqrtf4( vec.get128() ) );
-}
-
-inline const Vector3 rsqrtPerElem( Vector3 vec )
-{
-    return Vector3( rsqrtf4( vec.get128() ) );
-}
-
-inline const Vector3 absPerElem( Vector3 vec )
-{
-    return Vector3( fabsf4( vec.get128() ) );
-}
-
-inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( copysignf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( fmaxf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline float maxElem( Vector3 vec )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
-    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( fminf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline float minElem( Vector3 vec )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
-    result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline float sum( Vector3 vec )
-{
-    return
-        spu_extract( vec.get128(), 0 ) +
-        spu_extract( vec.get128(), 1 ) +
-        spu_extract( vec.get128(), 2 );
-}
-
-inline float dot( Vector3 vec0, Vector3 vec1 )
-{
-    return spu_extract( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
-}
-
-inline float lengthSqr( Vector3 vec )
-{
-    return spu_extract( _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
-}
-
-inline float length( Vector3 vec )
-{
-    return sqrtf( lengthSqr( vec ) );
-}
-
-inline const Vector3 normalize( Vector3 vec )
-{
-    vec_float4 dot = _vmathVfDot3( vec.get128(), vec.get128() );
-    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
-    return Vector3( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
-}
-
-inline const Vector3 cross( Vector3 vec0, Vector3 vec1 )
-{
-    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 )
-{
-    return Vector3( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Vector3 vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-inline void print( Vector3 vec, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-inline Vector4::Vector4( float _x, float _y, float _z, float _w )
-{
-    mVec128 = (vec_float4){ _x, _y, _z, _w };
-}
-
-inline Vector4::Vector4( Vector3 xyz, float _w )
-{
-    mVec128 = spu_shuffle( xyz.get128(), spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
-}
-
-inline Vector4::Vector4( Vector3 vec )
-{
-    mVec128 = spu_sel( vec.get128(), spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
-}
-
-inline Vector4::Vector4( Point3 pnt )
-{
-    mVec128 = spu_sel( pnt.get128(), spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
-}
-
-inline Vector4::Vector4( Quat quat )
-{
-    mVec128 = quat.get128();
-}
-
-inline Vector4::Vector4( float scalar )
-{
-    mVec128 = spu_splats( scalar );
-}
-
-inline Vector4::Vector4( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_1000 );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0100 );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0010 );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( _VECTORMATH_UNIT_0001 );
-}
-
-inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 )
-{
-    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
-    vec_uint4 selectMask;
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
-    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    tttt = spu_splats(t);
-    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
-    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
-    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
-    angles = spu_mul( angles, angle );
-    sines = sinf4( angles );
-    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
-    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
-    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
-    return Vector4( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
-}
-
-inline vec_float4 Vector4::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads )
-{
-    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
-    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
-}
-
-inline Vector4 & Vector4::operator =( Vector4 vec )
-{
-    mVec128 = vec.mVec128;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( Vector3 vec )
-{
-    mVec128 = spu_sel( vec.get128(), mVec128, (vec_uint4)spu_maskb(0x000f) );
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mVec128 );
-}
-
-inline Vector4 & Vector4::setX( float _x )
-{
-    mVec128 = spu_insert( _x, mVec128, 0 );
-    return *this;
-}
-
-inline float Vector4::getX( ) const
-{
-    return spu_extract( mVec128, 0 );
-}
-
-inline Vector4 & Vector4::setY( float _y )
-{
-    mVec128 = spu_insert( _y, mVec128, 1 );
-    return *this;
-}
-
-inline float Vector4::getY( ) const
-{
-    return spu_extract( mVec128, 1 );
-}
-
-inline Vector4 & Vector4::setZ( float _z )
-{
-    mVec128 = spu_insert( _z, mVec128, 2 );
-    return *this;
-}
-
-inline float Vector4::getZ( ) const
-{
-    return spu_extract( mVec128, 2 );
-}
-
-inline Vector4 & Vector4::setW( float _w )
-{
-    mVec128 = spu_insert( _w, mVec128, 3 );
-    return *this;
-}
-
-inline float Vector4::getW( ) const
-{
-    return spu_extract( mVec128, 3 );
-}
-
-inline Vector4 & Vector4::setElem( int idx, float value )
-{
-    mVec128 = spu_insert( value, mVec128, idx );
-    return *this;
-}
-
-inline float Vector4::getElem( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline VecIdx Vector4::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline float Vector4::operator []( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline const Vector4 Vector4::operator +( Vector4 vec ) const
-{
-    return Vector4( spu_add( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector4 Vector4::operator -( Vector4 vec ) const
-{
-    return Vector4( spu_sub( mVec128, vec.mVec128 ) );
-}
-
-inline const Vector4 Vector4::operator *( float scalar ) const
-{
-    return Vector4( spu_mul( mVec128, spu_splats(scalar) ) );
-}
-
-inline Vector4 & Vector4::operator +=( Vector4 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( Vector4 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( float scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( float scalar ) const
-{
-    return Vector4( divf4( mVec128, spu_splats(scalar) ) );
-}
-
-inline Vector4 & Vector4::operator /=( float scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4( negatef4( mVec128 ) );
-}
-
-inline const Vector4 operator *( float scalar, Vector4 vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( spu_mul( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( divf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 recipPerElem( Vector4 vec )
-{
-    return Vector4( recipf4( vec.get128() ) );
-}
-
-inline const Vector4 sqrtPerElem( Vector4 vec )
-{
-    return Vector4( sqrtf4( vec.get128() ) );
-}
-
-inline const Vector4 rsqrtPerElem( Vector4 vec )
-{
-    return Vector4( rsqrtf4( vec.get128() ) );
-}
-
-inline const Vector4 absPerElem( Vector4 vec )
-{
-    return Vector4( fabsf4( vec.get128() ) );
-}
-
-inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( copysignf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( fmaxf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline float maxElem( Vector4 vec )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
-    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
-    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 )
-{
-    return Vector4( fminf4( vec0.get128(), vec1.get128() ) );
-}
-
-inline float minElem( Vector4 vec )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
-    result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
-    result = fminf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline float sum( Vector4 vec )
-{
-    return
-        spu_extract( vec.get128(), 0 ) +
-        spu_extract( vec.get128(), 1 ) +
-        spu_extract( vec.get128(), 2 ) +
-        spu_extract( vec.get128(), 3 );
-}
-
-inline float dot( Vector4 vec0, Vector4 vec1 )
-{
-    return spu_extract( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
-}
-
-inline float lengthSqr( Vector4 vec )
-{
-    return spu_extract( _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
-}
-
-inline float length( Vector4 vec )
-{
-    return sqrtf( lengthSqr( vec ) );
-}
-
-inline const Vector4 normalize( Vector4 vec )
-{
-    vec_float4 dot = _vmathVfDot4( vec.get128(), vec.get128() );
-    return Vector4( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
-}
-
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 )
-{
-    return Vector4( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Vector4 vec )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-inline void print( Vector4 vec, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = vec.get128();
-    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
-}
-
-#endif
-
-inline Point3::Point3( float _x, float _y, float _z )
-{
-    mVec128 = (vec_float4){ _x, _y, _z, 0.0f  };
-}
-
-inline Point3::Point3( Vector3 vec )
-{
-    mVec128 = vec.get128();
-}
-
-inline Point3::Point3( float scalar )
-{
-    mVec128 = spu_splats( scalar );
-}
-
-inline Point3::Point3( vec_float4 vf4 )
-{
-    mVec128 = vf4;
-}
-
-inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline vec_float4 Point3::get128( ) const
-{
-    return mVec128;
-}
-
-inline void storeXYZ( Point3 pnt, vec_float4 * quad )
-{
-    vec_float4 dstVec = *quad;
-    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
-    dstVec = spu_sel(pnt.get128(), dstVec, mask);
-    *quad = dstVec;
-}
-
-inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
-    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
-    xyz3 = spu_rlqwbyte( zxyz, 4 );
-    pnt0 = Point3( xyzx );
-    pnt1 = Point3( xyz1 );
-    pnt2 = Point3( xyz2 );
-    pnt3 = Point3( xyz3 );
-}
-
-inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz;
-    xyzx = spu_shuffle( pnt0.get128(), pnt1.get128(), _VECTORMATH_SHUF_XYZA );
-    yzxy = spu_shuffle( pnt1.get128(), pnt2.get128(), _VECTORMATH_SHUF_YZAB );
-    zxyz = spu_shuffle( pnt2.get128(), pnt3.get128(), _VECTORMATH_SHUF_ZABC );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
-    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Point3 & Point3::operator =( Point3 pnt )
-{
-    mVec128 = pnt.mVec128;
-    return *this;
-}
-
-inline Point3 & Point3::setX( float _x )
-{
-    mVec128 = spu_insert( _x, mVec128, 0 );
-    return *this;
-}
-
-inline float Point3::getX( ) const
-{
-    return spu_extract( mVec128, 0 );
-}
-
-inline Point3 & Point3::setY( float _y )
-{
-    mVec128 = spu_insert( _y, mVec128, 1 );
-    return *this;
-}
-
-inline float Point3::getY( ) const
-{
-    return spu_extract( mVec128, 1 );
-}
-
-inline Point3 & Point3::setZ( float _z )
-{
-    mVec128 = spu_insert( _z, mVec128, 2 );
-    return *this;
-}
-
-inline float Point3::getZ( ) const
-{
-    return spu_extract( mVec128, 2 );
-}
-
-inline Point3 & Point3::setElem( int idx, float value )
-{
-    mVec128 = spu_insert( value, mVec128, idx );
-    return *this;
-}
-
-inline float Point3::getElem( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline VecIdx Point3::operator []( int idx )
-{
-    return VecIdx( mVec128, idx );
-}
-
-inline float Point3::operator []( int idx ) const
-{
-    return spu_extract( mVec128, idx );
-}
-
-inline const Vector3 Point3::operator -( Point3 pnt ) const
-{
-    return Vector3( spu_sub( mVec128, pnt.mVec128 ) );
-}
-
-inline const Point3 Point3::operator +( Vector3 vec ) const
-{
-    return Point3( spu_add( mVec128, vec.get128() ) );
-}
-
-inline const Point3 Point3::operator -( Vector3 vec ) const
-{
-    return Point3( spu_sub( mVec128, vec.get128() ) );
-}
-
-inline Point3 & Point3::operator +=( Vector3 vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( Vector3 vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( spu_mul( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( divf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 recipPerElem( Point3 pnt )
-{
-    return Point3( recipf4( pnt.get128() ) );
-}
-
-inline const Point3 sqrtPerElem( Point3 pnt )
-{
-    return Point3( sqrtf4( pnt.get128() ) );
-}
-
-inline const Point3 rsqrtPerElem( Point3 pnt )
-{
-    return Point3( rsqrtf4( pnt.get128() ) );
-}
-
-inline const Point3 absPerElem( Point3 pnt )
-{
-    return Point3( fabsf4( pnt.get128() ) );
-}
-
-inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( copysignf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( fmaxf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline float maxElem( Point3 pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
-    result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 )
-{
-    return Point3( fminf4( pnt0.get128(), pnt1.get128() ) );
-}
-
-inline float minElem( Point3 pnt )
-{
-    vec_float4 result;
-    result = fminf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
-    result = fminf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
-    return spu_extract( result, 0 );
-}
-
-inline float sum( Point3 pnt )
-{
-    return
-        spu_extract( pnt.get128(), 0 ) +
-        spu_extract( pnt.get128(), 1 ) +
-        spu_extract( pnt.get128(), 2 );
-}
-
-inline const Point3 scale( Point3 pnt, float scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( Point3 pnt, Vector3 scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline float projection( Point3 pnt, Vector3 unitVec )
-{
-    return spu_extract( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
-}
-
-inline float distSqrFromOrigin( Point3 pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline float distFromOrigin( Point3 pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline float distSqr( Point3 pnt0, Point3 pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline float dist( Point3 pnt0, Point3 pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 )
-{
-    return Point3( spu_sel( pnt0.get128(), pnt1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( Point3 pnt )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt.get128();
-    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-inline void print( Point3 pnt, const char * name )
-{
-    union { vec_float4 v; float s[4]; } tmp;
-    tmp.v = pnt.get128();
-    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
-}
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_W }
+#define _VECTORMATH_SHUF_WABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_SHUF_ZWAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A }
+#define _VECTORMATH_SHUF_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B }
+#define _VECTORMATH_SHUF_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_C }
+#define _VECTORMATH_UNIT_1000 (vec_float4){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 (vec_float4){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 (vec_float4){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 (vec_float4){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+static inline vec_float4 _vmathVfDot3( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0, vec1 );
+    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfDot4( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0, vec1 );
+    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+    return spu_add( spu_rlqwbyte( result, 8 ), result );
+}
+
+static inline vec_float4 _vmathVfCross( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_YZXW );
+    tmp1 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_ZXYW );
+    tmp2 = spu_shuffle( vec0, vec0, _VECTORMATH_SHUF_ZXYW );
+    tmp3 = spu_shuffle( vec1, vec1, _VECTORMATH_SHUF_YZXW );
+    result = spu_mul( tmp0, tmp1 );
+    result = spu_nmsub( tmp2, tmp3, result );
+    return result;
+}
+
+static inline vec_uint4 _vmathVfToHalfFloatsUnpacked(vec_float4 v)
+{
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = spu_splats(0x00007c00u);
+    const vec_uint4 mergeMant = spu_splats(0x000003ffu);
+    const vec_uint4 mergeSign = spu_splats(0x00008000u);
+
+    sign = spu_rlmask((vec_uint4)v, -16);
+    mant = spu_rlmask((vec_uint4)v, -13);
+    bexp = spu_and(spu_rlmask((vec_int4)v, -23), 0xff);
+
+    notZero = spu_cmpgt(bexp, 112);
+    isInf = spu_cmpgt(bexp, 142);
+
+    bexp = spu_add(bexp, -112);
+    bexp = spu_sl(bexp, 10);
+
+    hfloat = spu_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = spu_sel(spu_splats(0u), hfloat, notZero);
+    hfloat = spu_sel(hfloat, hfloatInf, isInf);
+    hfloat = spu_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+}
+
+static inline vec_ushort8 _vmath2VfToHalfFloats(vec_float4 u, vec_float4 v)
+{
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)spu_shuffle(hfloat_u, hfloat_v, pack);
+}
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline VecIdx::operator float() const
+{
+    return spu_extract( ref, i );
+}
+
+inline float VecIdx::operator =( float scalar )
+{
+    ref = spu_insert( scalar, ref, i );
+    return scalar;
+}
+
+inline float VecIdx::operator =( const VecIdx& scalar )
+{
+    return *this = float(scalar);
+}
+
+inline float VecIdx::operator *=( float scalar )
+{
+    float tmp = spu_extract( ref, i ) * scalar;
+    ref = spu_insert( tmp, ref, i );
+    return tmp;
+}
+
+inline float VecIdx::operator /=( float scalar )
+{
+    float tmp = spu_extract( ref, i ) / scalar;
+    ref = spu_insert( tmp, ref, i );
+    return tmp;
+}
+
+inline float VecIdx::operator +=( float scalar )
+{
+    float tmp = spu_extract( ref, i ) + scalar;
+    ref = spu_insert( tmp, ref, i );
+    return tmp;
+}
+
+inline float VecIdx::operator -=( float scalar )
+{
+    float tmp = spu_extract( ref, i ) - scalar;
+    ref = spu_insert( tmp, ref, i );
+    return tmp;
+}
+
+inline Vector3::Vector3( float _x, float _y, float _z )
+{
+    mVec128 = (vec_float4){ _x, _y, _z, 0.0f  };
+}
+
+inline Vector3::Vector3( Point3 pnt )
+{
+    mVec128 = pnt.get128();
+}
+
+inline Vector3::Vector3( float scalar )
+{
+    mVec128 = spu_splats( scalar );
+}
+
+inline Vector3::Vector3( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_1000 );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0100 );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0010 );
+}
+
+inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    return Vector3( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
+}
+
+inline vec_float4 Vector3::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeXYZ( Vector3 vec, vec_float4 * quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
+    dstVec = spu_sel(vec.get128(), dstVec, mask);
+    *quad = dstVec;
+}
+
+inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
+    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
+    xyz3 = spu_rlqwbyte( zxyz, 4 );
+    vec0 = Vector3( xyzx );
+    vec1 = Vector3( xyz1 );
+    vec2 = Vector3( xyz2 );
+    vec3 = Vector3( xyz3 );
+}
+
+inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = spu_shuffle( vec0.get128(), vec1.get128(), _VECTORMATH_SHUF_XYZA );
+    yzxy = spu_shuffle( vec1.get128(), vec2.get128(), _VECTORMATH_SHUF_YZAB );
+    zxyz = spu_shuffle( vec2.get128(), vec3.get128(), _VECTORMATH_SHUF_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Vector3 & Vector3::operator =( Vector3 vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( float _x )
+{
+    mVec128 = spu_insert( _x, mVec128, 0 );
+    return *this;
+}
+
+inline float Vector3::getX( ) const
+{
+    return spu_extract( mVec128, 0 );
+}
+
+inline Vector3 & Vector3::setY( float _y )
+{
+    mVec128 = spu_insert( _y, mVec128, 1 );
+    return *this;
+}
+
+inline float Vector3::getY( ) const
+{
+    return spu_extract( mVec128, 1 );
+}
+
+inline Vector3 & Vector3::setZ( float _z )
+{
+    mVec128 = spu_insert( _z, mVec128, 2 );
+    return *this;
+}
+
+inline float Vector3::getZ( ) const
+{
+    return spu_extract( mVec128, 2 );
+}
+
+inline Vector3 & Vector3::setElem( int idx, float value )
+{
+    mVec128 = spu_insert( value, mVec128, idx );
+    return *this;
+}
+
+inline float Vector3::getElem( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline VecIdx Vector3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline float Vector3::operator []( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline const Vector3 Vector3::operator +( Vector3 vec ) const
+{
+    return Vector3( spu_add( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector3 Vector3::operator -( Vector3 vec ) const
+{
+    return Vector3( spu_sub( mVec128, vec.mVec128 ) );
+}
+
+inline const Point3 Vector3::operator +( Point3 pnt ) const
+{
+    return Point3( spu_add( mVec128, pnt.get128() ) );
+}
+
+inline const Vector3 Vector3::operator *( float scalar ) const
+{
+    return Vector3( spu_mul( mVec128, spu_splats(scalar) ) );
+}
+
+inline Vector3 & Vector3::operator +=( Vector3 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( Vector3 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( float scalar ) const
+{
+    return Vector3( divf4( mVec128, spu_splats(scalar) ) );
+}
+
+inline Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3( negatef4( mVec128 ) );
+}
+
+inline const Vector3 operator *( float scalar, Vector3 vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( spu_mul( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( divf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 recipPerElem( Vector3 vec )
+{
+    return Vector3( recipf4( vec.get128() ) );
+}
+
+inline const Vector3 sqrtPerElem( Vector3 vec )
+{
+    return Vector3( sqrtf4( vec.get128() ) );
+}
+
+inline const Vector3 rsqrtPerElem( Vector3 vec )
+{
+    return Vector3( rsqrtf4( vec.get128() ) );
+}
+
+inline const Vector3 absPerElem( Vector3 vec )
+{
+    return Vector3( fabsf4( vec.get128() ) );
+}
+
+inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( copysignf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( fmaxf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline float maxElem( Vector3 vec )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
+    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( fminf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline float minElem( Vector3 vec )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
+    result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline float sum( Vector3 vec )
+{
+    return
+        spu_extract( vec.get128(), 0 ) +
+        spu_extract( vec.get128(), 1 ) +
+        spu_extract( vec.get128(), 2 );
+}
+
+inline float dot( Vector3 vec0, Vector3 vec1 )
+{
+    return spu_extract( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
+}
+
+inline float lengthSqr( Vector3 vec )
+{
+    return spu_extract( _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
+}
+
+inline float length( Vector3 vec )
+{
+    return sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( Vector3 vec )
+{
+    vec_float4 dot = _vmathVfDot3( vec.get128(), vec.get128() );
+    dot = spu_shuffle( dot, dot, (vec_uchar16)spu_splats(0x00010203) );
+    return Vector3( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
+}
+
+inline const Vector3 cross( Vector3 vec0, Vector3 vec1 )
+{
+    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 )
+{
+    return Vector3( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Vector3 vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+inline void print( Vector3 vec, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+inline Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    mVec128 = (vec_float4){ _x, _y, _z, _w };
+}
+
+inline Vector4::Vector4( Vector3 xyz, float _w )
+{
+    mVec128 = spu_shuffle( xyz.get128(), spu_promote( _w, 0 ), _VECTORMATH_SHUF_XYZA );
+}
+
+inline Vector4::Vector4( Vector3 vec )
+{
+    mVec128 = spu_sel( vec.get128(), spu_splats(0.0f), (vec_uint4)spu_maskb(0x000f) );
+}
+
+inline Vector4::Vector4( Point3 pnt )
+{
+    mVec128 = spu_sel( pnt.get128(), spu_splats(1.0f), (vec_uint4)spu_maskb(0x000f) );
+}
+
+inline Vector4::Vector4( Quat quat )
+{
+    mVec128 = quat.get128();
+}
+
+inline Vector4::Vector4( float scalar )
+{
+    mVec128 = spu_splats( scalar );
+}
+
+inline Vector4::Vector4( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_1000 );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0100 );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0010 );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0001 );
+}
+
+inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 )
+{
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    vec_uint4 selectMask;
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
+    cosAngle = spu_shuffle( cosAngle, cosAngle, shuffle_xxxx );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = spu_splats(t);
+    oneMinusT = spu_sub( spu_splats(1.0f), tttt );
+    angles = spu_sel( spu_splats(1.0f), oneMinusT, (vec_uint4)spu_maskb(0x0f00) );
+    angles = spu_sel( angles, tttt, (vec_uint4)spu_maskb(0x00f0) );
+    angles = spu_mul( angles, angle );
+    sines = sinf4( angles );
+    scales = divf4( sines, spu_shuffle( sines, sines, shuffle_xxxx ) );
+    scale0 = spu_sel( oneMinusT, spu_shuffle( scales, scales, shuffle_yyyy ), selectMask );
+    scale1 = spu_sel( tttt, spu_shuffle( scales, scales, shuffle_zzzz ), selectMask );
+    return Vector4( spu_madd( unitVec0.get128(), scale0, spu_mul( unitVec1.get128(), scale1 ) ) );
+}
+
+inline vec_float4 Vector4::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
+}
+
+inline Vector4 & Vector4::operator =( Vector4 vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( Vector3 vec )
+{
+    mVec128 = spu_sel( vec.get128(), mVec128, (vec_uint4)spu_maskb(0x000f) );
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+inline Vector4 & Vector4::setX( float _x )
+{
+    mVec128 = spu_insert( _x, mVec128, 0 );
+    return *this;
+}
+
+inline float Vector4::getX( ) const
+{
+    return spu_extract( mVec128, 0 );
+}
+
+inline Vector4 & Vector4::setY( float _y )
+{
+    mVec128 = spu_insert( _y, mVec128, 1 );
+    return *this;
+}
+
+inline float Vector4::getY( ) const
+{
+    return spu_extract( mVec128, 1 );
+}
+
+inline Vector4 & Vector4::setZ( float _z )
+{
+    mVec128 = spu_insert( _z, mVec128, 2 );
+    return *this;
+}
+
+inline float Vector4::getZ( ) const
+{
+    return spu_extract( mVec128, 2 );
+}
+
+inline Vector4 & Vector4::setW( float _w )
+{
+    mVec128 = spu_insert( _w, mVec128, 3 );
+    return *this;
+}
+
+inline float Vector4::getW( ) const
+{
+    return spu_extract( mVec128, 3 );
+}
+
+inline Vector4 & Vector4::setElem( int idx, float value )
+{
+    mVec128 = spu_insert( value, mVec128, idx );
+    return *this;
+}
+
+inline float Vector4::getElem( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline VecIdx Vector4::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline float Vector4::operator []( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline const Vector4 Vector4::operator +( Vector4 vec ) const
+{
+    return Vector4( spu_add( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector4 Vector4::operator -( Vector4 vec ) const
+{
+    return Vector4( spu_sub( mVec128, vec.mVec128 ) );
+}
+
+inline const Vector4 Vector4::operator *( float scalar ) const
+{
+    return Vector4( spu_mul( mVec128, spu_splats(scalar) ) );
+}
+
+inline Vector4 & Vector4::operator +=( Vector4 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( Vector4 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( float scalar ) const
+{
+    return Vector4( divf4( mVec128, spu_splats(scalar) ) );
+}
+
+inline Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4( negatef4( mVec128 ) );
+}
+
+inline const Vector4 operator *( float scalar, Vector4 vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( spu_mul( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( divf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 recipPerElem( Vector4 vec )
+{
+    return Vector4( recipf4( vec.get128() ) );
+}
+
+inline const Vector4 sqrtPerElem( Vector4 vec )
+{
+    return Vector4( sqrtf4( vec.get128() ) );
+}
+
+inline const Vector4 rsqrtPerElem( Vector4 vec )
+{
+    return Vector4( rsqrtf4( vec.get128() ) );
+}
+
+inline const Vector4 absPerElem( Vector4 vec )
+{
+    return Vector4( fabsf4( vec.get128() ) );
+}
+
+inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( copysignf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( fmaxf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline float maxElem( Vector4 vec )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
+    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
+    result = fmaxf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 )
+{
+    return Vector4( fminf4( vec0.get128(), vec1.get128() ) );
+}
+
+inline float minElem( Vector4 vec )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( vec.get128(), 1 ), 0 ), vec.get128() );
+    result = fminf4( spu_promote( spu_extract( vec.get128(), 2 ), 0 ), result );
+    result = fminf4( spu_promote( spu_extract( vec.get128(), 3 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline float sum( Vector4 vec )
+{
+    return
+        spu_extract( vec.get128(), 0 ) +
+        spu_extract( vec.get128(), 1 ) +
+        spu_extract( vec.get128(), 2 ) +
+        spu_extract( vec.get128(), 3 );
+}
+
+inline float dot( Vector4 vec0, Vector4 vec1 )
+{
+    return spu_extract( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
+}
+
+inline float lengthSqr( Vector4 vec )
+{
+    return spu_extract( _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
+}
+
+inline float length( Vector4 vec )
+{
+    return sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( Vector4 vec )
+{
+    vec_float4 dot = _vmathVfDot4( vec.get128(), vec.get128() );
+    return Vector4( spu_mul( vec.get128(), rsqrtf4( dot ) ) );
+}
+
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 )
+{
+    return Vector4( spu_sel( vec0.get128(), vec1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Vector4 vec )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+inline void print( Vector4 vec, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+inline Point3::Point3( float _x, float _y, float _z )
+{
+    mVec128 = (vec_float4){ _x, _y, _z, 0.0f  };
+}
+
+inline Point3::Point3( Vector3 vec )
+{
+    mVec128 = vec.get128();
+}
+
+inline Point3::Point3( float scalar )
+{
+    mVec128 = spu_splats( scalar );
+}
+
+inline Point3::Point3( vec_float4 vf4 )
+{
+    mVec128 = vf4;
+}
+
+inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline vec_float4 Point3::get128( ) const
+{
+    return mVec128;
+}
+
+inline void storeXYZ( Point3 pnt, vec_float4 * quad )
+{
+    vec_float4 dstVec = *quad;
+    vec_uint4 mask = (vec_uint4)spu_maskb(0x000f);
+    dstVec = spu_sel(pnt.get128(), dstVec, mask);
+    *quad = dstVec;
+}
+
+inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyz1, xyz2, xyz3;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyz1 = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_WABC );
+    xyz2 = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_ZWAB );
+    xyz3 = spu_rlqwbyte( zxyz, 4 );
+    pnt0 = Point3( xyzx );
+    pnt1 = Point3( xyz1 );
+    pnt2 = Point3( xyz2 );
+    pnt3 = Point3( xyz3 );
+}
+
+inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz;
+    xyzx = spu_shuffle( pnt0.get128(), pnt1.get128(), _VECTORMATH_SHUF_XYZA );
+    yzxy = spu_shuffle( pnt1.get128(), pnt2.get128(), _VECTORMATH_SHUF_YZAB );
+    zxyz = spu_shuffle( pnt2.get128(), pnt3.get128(), _VECTORMATH_SHUF_ZABC );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Point3 & Point3::operator =( Point3 pnt )
+{
+    mVec128 = pnt.mVec128;
+    return *this;
+}
+
+inline Point3 & Point3::setX( float _x )
+{
+    mVec128 = spu_insert( _x, mVec128, 0 );
+    return *this;
+}
+
+inline float Point3::getX( ) const
+{
+    return spu_extract( mVec128, 0 );
+}
+
+inline Point3 & Point3::setY( float _y )
+{
+    mVec128 = spu_insert( _y, mVec128, 1 );
+    return *this;
+}
+
+inline float Point3::getY( ) const
+{
+    return spu_extract( mVec128, 1 );
+}
+
+inline Point3 & Point3::setZ( float _z )
+{
+    mVec128 = spu_insert( _z, mVec128, 2 );
+    return *this;
+}
+
+inline float Point3::getZ( ) const
+{
+    return spu_extract( mVec128, 2 );
+}
+
+inline Point3 & Point3::setElem( int idx, float value )
+{
+    mVec128 = spu_insert( value, mVec128, idx );
+    return *this;
+}
+
+inline float Point3::getElem( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline VecIdx Point3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+inline float Point3::operator []( int idx ) const
+{
+    return spu_extract( mVec128, idx );
+}
+
+inline const Vector3 Point3::operator -( Point3 pnt ) const
+{
+    return Vector3( spu_sub( mVec128, pnt.mVec128 ) );
+}
+
+inline const Point3 Point3::operator +( Vector3 vec ) const
+{
+    return Point3( spu_add( mVec128, vec.get128() ) );
+}
+
+inline const Point3 Point3::operator -( Vector3 vec ) const
+{
+    return Point3( spu_sub( mVec128, vec.get128() ) );
+}
+
+inline Point3 & Point3::operator +=( Vector3 vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( Vector3 vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( spu_mul( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( divf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 recipPerElem( Point3 pnt )
+{
+    return Point3( recipf4( pnt.get128() ) );
+}
+
+inline const Point3 sqrtPerElem( Point3 pnt )
+{
+    return Point3( sqrtf4( pnt.get128() ) );
+}
+
+inline const Point3 rsqrtPerElem( Point3 pnt )
+{
+    return Point3( rsqrtf4( pnt.get128() ) );
+}
+
+inline const Point3 absPerElem( Point3 pnt )
+{
+    return Point3( fabsf4( pnt.get128() ) );
+}
+
+inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( copysignf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( fmaxf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline float maxElem( Point3 pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
+    result = fmaxf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 )
+{
+    return Point3( fminf4( pnt0.get128(), pnt1.get128() ) );
+}
+
+inline float minElem( Point3 pnt )
+{
+    vec_float4 result;
+    result = fminf4( spu_promote( spu_extract( pnt.get128(), 1 ), 0 ), pnt.get128() );
+    result = fminf4( spu_promote( spu_extract( pnt.get128(), 2 ), 0 ), result );
+    return spu_extract( result, 0 );
+}
+
+inline float sum( Point3 pnt )
+{
+    return
+        spu_extract( pnt.get128(), 0 ) +
+        spu_extract( pnt.get128(), 1 ) +
+        spu_extract( pnt.get128(), 2 );
+}
+
+inline const Point3 scale( Point3 pnt, float scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( Point3 pnt, Vector3 scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline float projection( Point3 pnt, Vector3 unitVec )
+{
+    return spu_extract( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
+}
+
+inline float distSqrFromOrigin( Point3 pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline float distFromOrigin( Point3 pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline float distSqr( Point3 pnt0, Point3 pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline float dist( Point3 pnt0, Point3 pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 )
+{
+    return Point3( spu_sel( pnt0.get128(), pnt1.get128(), spu_splats( (unsigned int)-(select1 > 0) ) ) );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( Point3 pnt )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+inline void print( Point3 pnt, const char * name )
+{
+    union { vec_float4 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_soa.h
index 1ac657732..1e4e04b2d 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vec_soa.h
@@ -1,1439 +1,1439 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VEC_SOA_CPP_H
-#define _VECTORMATH_VEC_SOA_CPP_H
-//-----------------------------------------------------------------------------
-// Constants
-// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
-
-#define _VECTORMATH_SHUF_X 0x00010203
-#define _VECTORMATH_SHUF_Y 0x04050607
-#define _VECTORMATH_SHUF_Z 0x08090a0b
-#define _VECTORMATH_SHUF_W 0x0c0d0e0f
-#define _VECTORMATH_SHUF_A 0x10111213
-#define _VECTORMATH_SHUF_B 0x14151617
-#define _VECTORMATH_SHUF_C 0x18191a1b
-#define _VECTORMATH_SHUF_D 0x1c1d1e1f
-#define _VECTORMATH_SHUF_0 0x80808080
-#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
-#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
-#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
-#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
-#define _VECTORMATH_SLERP_TOL 0.999f
-
-//-----------------------------------------------------------------------------
-// Definitions
-
-#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
-#define _VECTORMATH_INTERNAL_FUNCTIONS
-
-#endif
-
-namespace Vectormath {
-namespace Soa {
-
-inline Vector3::Vector3( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-}
-
-inline Vector3::Vector3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Vector3::Vector3( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-}
-
-inline Vector3::Vector3( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline Vector3::Vector3( Aos::Vector3 vec )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_float4 vec128 = vec.get128();
-    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
-}
-
-inline Vector3::Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_ZCWD );
-    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-}
-
-inline const Vector3 Vector3::xAxis( )
-{
-    return Vector3( spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-inline const Vector3 Vector3::yAxis( )
-{
-    return Vector3( spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
-}
-
-inline const Vector3 Vector3::zAxis( )
-{
-    return Vector3( spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
-{
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline void Vector3::get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
-    result0 = Aos::Vector3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_XAYB ) );
-    result1 = Aos::Vector3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_ZBW0 ) );
-    result2 = Aos::Vector3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_XCY0 ) );
-    result3 = Aos::Vector3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_ZDW0 ) );
-}
-
-inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
-    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
-    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
-    vec.setX( spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
-    vec.setY( spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
-    vec.setZ( spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
-}
-
-inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = spu_shuffle( vec.getX(), vec.getY(), _VECTORMATH_SHUF_XAZC );
-    zxzx = spu_shuffle( vec.getZ(), vec.getX(), _VECTORMATH_SHUF_ZDXB );
-    yzyz = spu_shuffle( vec.getY(), vec.getZ(), _VECTORMATH_SHUF_YBWD );
-    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
-    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
-    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( vec0, xyz0 );
-    storeXYZArray( vec1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Vector3 & Vector3::operator =( const Vector3 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    return *this;
-}
-
-inline Vector3 & Vector3::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Vector3::getX( ) const
-{
-    return mX;
-}
-
-inline Vector3 & Vector3::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Vector3::getY( ) const
-{
-    return mY;
-}
-
-inline Vector3 & Vector3::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Vector3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector3 & Vector3::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Vector3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Vector3::vec_float4_t & Vector3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Vector3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
-{
-    return Vector3(
-        spu_add( mX, vec.mX ),
-        spu_add( mY, vec.mY ),
-        spu_add( mZ, vec.mZ )
-    );
-}
-
-inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
-{
-    return Vector3(
-        spu_sub( mX, vec.mX ),
-        spu_sub( mY, vec.mY ),
-        spu_sub( mZ, vec.mZ )
-    );
-}
-
-inline const Point3 Vector3::operator +( const Point3 & pnt ) const
-{
-    return Point3(
-        spu_add( mX, pnt.getX() ),
-        spu_add( mY, pnt.getY() ),
-        spu_add( mZ, pnt.getZ() )
-    );
-}
-
-inline const Vector3 Vector3::operator *( vec_float4 scalar ) const
-{
-    return Vector3(
-        spu_mul( mX, scalar ),
-        spu_mul( mY, scalar ),
-        spu_mul( mZ, scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector3 & Vector3::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator /( vec_float4 scalar ) const
-{
-    return Vector3(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar )
-    );
-}
-
-inline Vector3 & Vector3::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector3 Vector3::operator -( ) const
-{
-    return Vector3(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ )
-    );
-}
-
-inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        spu_mul( vec0.getX(), vec1.getX() ),
-        spu_mul( vec0.getY(), vec1.getY() ),
-        spu_mul( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        divf4( vec0.getX(), vec1.getX() ),
-        divf4( vec0.getY(), vec1.getY() ),
-        divf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline const Vector3 recipPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        recipf4( vec.getX() ),
-        recipf4( vec.getY() ),
-        recipf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 sqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        sqrtf4( vec.getX() ),
-        sqrtf4( vec.getY() ),
-        sqrtf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 rsqrtPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        rsqrtf4( vec.getX() ),
-        rsqrtf4( vec.getY() ),
-        rsqrtf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 absPerElem( const Vector3 & vec )
-{
-    return Vector3(
-        fabsf4( vec.getX() ),
-        fabsf4( vec.getY() ),
-        fabsf4( vec.getZ() )
-    );
-}
-
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        copysignf4( vec0.getX(), vec1.getX() ),
-        copysignf4( vec0.getY(), vec1.getY() ),
-        copysignf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        fmaxf4( vec0.getX(), vec1.getX() ),
-        fmaxf4( vec0.getY(), vec1.getY() ),
-        fmaxf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline vec_float4 maxElem( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec.getX(), vec.getY() );
-    result = fmaxf4( vec.getZ(), result );
-    return result;
-}
-
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        fminf4( vec0.getX(), vec1.getX() ),
-        fminf4( vec0.getY(), vec1.getY() ),
-        fminf4( vec0.getZ(), vec1.getZ() )
-    );
-}
-
-inline vec_float4 minElem( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = fminf4( vec.getX(), vec.getY() );
-    result = fminf4( vec.getZ(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = spu_add( vec.getX(), vec.getY() );
-    result = spu_add( result, vec.getZ() );
-    return result;
-}
-
-inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0.getX(), vec1.getX() );
-    result = spu_add( result, spu_mul( vec0.getY(), vec1.getY() ) );
-    result = spu_add( result, spu_mul( vec0.getZ(), vec1.getZ() ) );
-    return result;
-}
-
-inline vec_float4 lengthSqr( const Vector3 & vec )
-{
-    vec_float4 result;
-    result = spu_mul( vec.getX(), vec.getX() );
-    result = spu_add( result, spu_mul( vec.getY(), vec.getY() ) );
-    result = spu_add( result, spu_mul( vec.getZ(), vec.getZ() ) );
-    return result;
-}
-
-inline vec_float4 length( const Vector3 & vec )
-{
-    return sqrtf4( lengthSqr( vec ) );
-}
-
-inline const Vector3 normalize( const Vector3 & vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = rsqrtf4( lenSqr );
-    return Vector3(
-        spu_mul( vec.getX(), lenInv ),
-        spu_mul( vec.getY(), lenInv ),
-        spu_mul( vec.getZ(), lenInv )
-    );
-}
-
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
-{
-    return Vector3(
-        spu_sub( spu_mul( vec0.getY(), vec1.getZ() ), spu_mul( vec0.getZ(), vec1.getY() ) ),
-        spu_sub( spu_mul( vec0.getZ(), vec1.getX() ), spu_mul( vec0.getX(), vec1.getZ() ) ),
-        spu_sub( spu_mul( vec0.getX(), vec1.getY() ), spu_mul( vec0.getY(), vec1.getX() ) )
-    );
-}
-
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 )
-{
-    return Vector3(
-        spu_sel( vec0.getX(), vec1.getX(), select1 ),
-        spu_sel( vec0.getY(), vec1.getY(), select1 ),
-        spu_sel( vec0.getZ(), vec1.getZ(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector3 & vec )
-{
-    Aos::Vector3 vec0, vec1, vec2, vec3;
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Vector3 & vec, const char * name )
-{
-    Aos::Vector3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-inline Vector4::Vector4( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-}
-
-inline Vector4::Vector4( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-    mW = _w;
-}
-
-inline Vector4::Vector4( const Vector3 & xyz, vec_float4 _w )
-{
-    this->setXYZ( xyz );
-    this->setW( _w );
-}
-
-inline Vector4::Vector4( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    mW = spu_splats(0.0f);
-}
-
-inline Vector4::Vector4( const Point3 & pnt )
-{
-    mX = pnt.getX();
-    mY = pnt.getY();
-    mZ = pnt.getZ();
-    mW = spu_splats(1.0f);
-}
-
-inline Vector4::Vector4( const Quat & quat )
-{
-    mX = quat.getX();
-    mY = quat.getY();
-    mZ = quat.getZ();
-    mW = quat.getW();
-}
-
-inline Vector4::Vector4( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-    mW = scalar;
-}
-
-inline Vector4::Vector4( Aos::Vector4 vec )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
-    vec_float4 vec128 = vec.get128();
-    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
-    mW = spu_shuffle( vec128, vec128, shuffle_wwww );
-}
-
-inline Vector4::Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_ZCWD );
-    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-    mW = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
-}
-
-inline const Vector4 Vector4::xAxis( )
-{
-    return Vector4( spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-inline const Vector4 Vector4::yAxis( )
-{
-    return Vector4( spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
-}
-
-inline const Vector4 Vector4::zAxis( )
-{
-    return Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
-}
-
-inline const Vector4 Vector4::wAxis( )
-{
-    return Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
-}
-
-inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
-}
-
-inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
-{
-    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
-    vec_uint4 selectMask;
-    cosAngle = dot( unitVec0, unitVec1 );
-    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
-    angle = acosf4( cosAngle );
-    recipSinAngle = recipf4( sinf4( angle ) );
-    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
-    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
-    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
-}
-
-inline void Vector4::get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_ZCWD );
-    result0 = Aos::Vector4( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
-    result1 = Aos::Vector4( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
-    result2 = Aos::Vector4( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
-    result3 = Aos::Vector4( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
-}
-
-inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads )
-{
-    Aos::Vector4 v0, v1, v2, v3;
-    vec.get4Aos( v0, v1, v2, v3 );
-    twoQuads[0] = _vmath2VfToHalfFloats(v0.get128(), v1.get128());
-    twoQuads[1] = _vmath2VfToHalfFloats(v2.get128(), v3.get128());
-}
-
-inline Vector4 & Vector4::operator =( const Vector4 & vec )
-{
-    mX = vec.mX;
-    mY = vec.mY;
-    mZ = vec.mZ;
-    mW = vec.mW;
-    return *this;
-}
-
-inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-    return *this;
-}
-
-inline const Vector3 Vector4::getXYZ( ) const
-{
-    return Vector3( mX, mY, mZ );
-}
-
-inline Vector4 & Vector4::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Vector4::getX( ) const
-{
-    return mX;
-}
-
-inline Vector4 & Vector4::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Vector4::getY( ) const
-{
-    return mY;
-}
-
-inline Vector4 & Vector4::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Vector4::getZ( ) const
-{
-    return mZ;
-}
-
-inline Vector4 & Vector4::setW( vec_float4 _w )
-{
-    mW = _w;
-    return *this;
-}
-
-inline vec_float4 Vector4::getW( ) const
-{
-    return mW;
-}
-
-inline Vector4 & Vector4::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Vector4::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Vector4::vec_float4_t & Vector4::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Vector4::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
-{
-    return Vector4(
-        spu_add( mX, vec.mX ),
-        spu_add( mY, vec.mY ),
-        spu_add( mZ, vec.mZ ),
-        spu_add( mW, vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
-{
-    return Vector4(
-        spu_sub( mX, vec.mX ),
-        spu_sub( mY, vec.mY ),
-        spu_sub( mZ, vec.mZ ),
-        spu_sub( mW, vec.mW )
-    );
-}
-
-inline const Vector4 Vector4::operator *( vec_float4 scalar ) const
-{
-    return Vector4(
-        spu_mul( mX, scalar ),
-        spu_mul( mY, scalar ),
-        spu_mul( mZ, scalar ),
-        spu_mul( mW, scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator +=( const Vector4 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator -=( const Vector4 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline Vector4 & Vector4::operator *=( vec_float4 scalar )
-{
-    *this = *this * scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator /( vec_float4 scalar ) const
-{
-    return Vector4(
-        divf4( mX, scalar ),
-        divf4( mY, scalar ),
-        divf4( mZ, scalar ),
-        divf4( mW, scalar )
-    );
-}
-
-inline Vector4 & Vector4::operator /=( vec_float4 scalar )
-{
-    *this = *this / scalar;
-    return *this;
-}
-
-inline const Vector4 Vector4::operator -( ) const
-{
-    return Vector4(
-        negatef4( mX ),
-        negatef4( mY ),
-        negatef4( mZ ),
-        negatef4( mW )
-    );
-}
-
-inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec )
-{
-    return vec * scalar;
-}
-
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        spu_mul( vec0.getX(), vec1.getX() ),
-        spu_mul( vec0.getY(), vec1.getY() ),
-        spu_mul( vec0.getZ(), vec1.getZ() ),
-        spu_mul( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        divf4( vec0.getX(), vec1.getX() ),
-        divf4( vec0.getY(), vec1.getY() ),
-        divf4( vec0.getZ(), vec1.getZ() ),
-        divf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline const Vector4 recipPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        recipf4( vec.getX() ),
-        recipf4( vec.getY() ),
-        recipf4( vec.getZ() ),
-        recipf4( vec.getW() )
-    );
-}
-
-inline const Vector4 sqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        sqrtf4( vec.getX() ),
-        sqrtf4( vec.getY() ),
-        sqrtf4( vec.getZ() ),
-        sqrtf4( vec.getW() )
-    );
-}
-
-inline const Vector4 rsqrtPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        rsqrtf4( vec.getX() ),
-        rsqrtf4( vec.getY() ),
-        rsqrtf4( vec.getZ() ),
-        rsqrtf4( vec.getW() )
-    );
-}
-
-inline const Vector4 absPerElem( const Vector4 & vec )
-{
-    return Vector4(
-        fabsf4( vec.getX() ),
-        fabsf4( vec.getY() ),
-        fabsf4( vec.getZ() ),
-        fabsf4( vec.getW() )
-    );
-}
-
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        copysignf4( vec0.getX(), vec1.getX() ),
-        copysignf4( vec0.getY(), vec1.getY() ),
-        copysignf4( vec0.getZ(), vec1.getZ() ),
-        copysignf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        fmaxf4( vec0.getX(), vec1.getX() ),
-        fmaxf4( vec0.getY(), vec1.getY() ),
-        fmaxf4( vec0.getZ(), vec1.getZ() ),
-        fmaxf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline vec_float4 maxElem( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = fmaxf4( vec.getX(), vec.getY() );
-    result = fmaxf4( vec.getZ(), result );
-    result = fmaxf4( vec.getW(), result );
-    return result;
-}
-
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    return Vector4(
-        fminf4( vec0.getX(), vec1.getX() ),
-        fminf4( vec0.getY(), vec1.getY() ),
-        fminf4( vec0.getZ(), vec1.getZ() ),
-        fminf4( vec0.getW(), vec1.getW() )
-    );
-}
-
-inline vec_float4 minElem( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = fminf4( vec.getX(), vec.getY() );
-    result = fminf4( vec.getZ(), result );
-    result = fminf4( vec.getW(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = spu_add( vec.getX(), vec.getY() );
-    result = spu_add( result, vec.getZ() );
-    result = spu_add( result, vec.getW() );
-    return result;
-}
-
-inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 )
-{
-    vec_float4 result;
-    result = spu_mul( vec0.getX(), vec1.getX() );
-    result = spu_add( result, spu_mul( vec0.getY(), vec1.getY() ) );
-    result = spu_add( result, spu_mul( vec0.getZ(), vec1.getZ() ) );
-    result = spu_add( result, spu_mul( vec0.getW(), vec1.getW() ) );
-    return result;
-}
-
-inline vec_float4 lengthSqr( const Vector4 & vec )
-{
-    vec_float4 result;
-    result = spu_mul( vec.getX(), vec.getX() );
-    result = spu_add( result, spu_mul( vec.getY(), vec.getY() ) );
-    result = spu_add( result, spu_mul( vec.getZ(), vec.getZ() ) );
-    result = spu_add( result, spu_mul( vec.getW(), vec.getW() ) );
-    return result;
-}
-
-inline vec_float4 length( const Vector4 & vec )
-{
-    return sqrtf4( lengthSqr( vec ) );
-}
-
-inline const Vector4 normalize( const Vector4 & vec )
-{
-    vec_float4 lenSqr, lenInv;
-    lenSqr = lengthSqr( vec );
-    lenInv = rsqrtf4( lenSqr );
-    return Vector4(
-        spu_mul( vec.getX(), lenInv ),
-        spu_mul( vec.getY(), lenInv ),
-        spu_mul( vec.getZ(), lenInv ),
-        spu_mul( vec.getW(), lenInv )
-    );
-}
-
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 )
-{
-    return Vector4(
-        spu_sel( vec0.getX(), vec1.getX(), select1 ),
-        spu_sel( vec0.getY(), vec1.getY(), select1 ),
-        spu_sel( vec0.getZ(), vec1.getZ(), select1 ),
-        spu_sel( vec0.getW(), vec1.getW(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Vector4 & vec )
-{
-    Aos::Vector4 vec0, vec1, vec2, vec3;
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Vector4 & vec, const char * name )
-{
-    Aos::Vector4 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    vec.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-inline Point3::Point3( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-}
-
-inline Point3::Point3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
-{
-    mX = _x;
-    mY = _y;
-    mZ = _z;
-}
-
-inline Point3::Point3( const Vector3 & vec )
-{
-    mX = vec.getX();
-    mY = vec.getY();
-    mZ = vec.getZ();
-}
-
-inline Point3::Point3( vec_float4 scalar )
-{
-    mX = scalar;
-    mY = scalar;
-    mZ = scalar;
-}
-
-inline Point3::Point3( Aos::Point3 pnt )
-{
-    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
-    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
-    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
-    vec_float4 vec128 = pnt.get128();
-    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
-    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
-    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
-}
-
-inline Point3::Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 )
-{
-    vec_float4 tmp0, tmp1, tmp2, tmp3;
-    tmp0 = spu_shuffle( pnt0.get128(), pnt2.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( pnt1.get128(), pnt3.get128(), _VECTORMATH_SHUF_XAYB );
-    tmp2 = spu_shuffle( pnt0.get128(), pnt2.get128(), _VECTORMATH_SHUF_ZCWD );
-    tmp3 = spu_shuffle( pnt1.get128(), pnt3.get128(), _VECTORMATH_SHUF_ZCWD );
-    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
-    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
-    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
-}
-
-inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
-}
-
-inline void Point3::get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const
-{
-    vec_float4 tmp0, tmp1;
-    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
-    tmp1 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
-    result0 = Aos::Point3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_XAYB ) );
-    result1 = Aos::Point3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_ZBW0 ) );
-    result2 = Aos::Point3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_XCY0 ) );
-    result3 = Aos::Point3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_ZDW0 ) );
-}
-
-inline void loadXYZArray( Point3 & vec, const vec_float4 * threeQuads )
-{
-    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
-    xyzx = threeQuads[0];
-    yzxy = threeQuads[1];
-    zxyz = threeQuads[2];
-    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
-    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
-    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
-    vec.setX( spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
-    vec.setY( spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
-    vec.setZ( spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
-}
-
-inline void storeXYZArray( const Point3 & vec, vec_float4 * threeQuads )
-{
-    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
-    xyxy = spu_shuffle( vec.getX(), vec.getY(), _VECTORMATH_SHUF_XAZC );
-    zxzx = spu_shuffle( vec.getZ(), vec.getX(), _VECTORMATH_SHUF_ZDXB );
-    yzyz = spu_shuffle( vec.getY(), vec.getZ(), _VECTORMATH_SHUF_YBWD );
-    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
-    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
-    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
-    threeQuads[0] = xyzx;
-    threeQuads[1] = yzxy;
-    threeQuads[2] = zxyz;
-}
-
-inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads )
-{
-    vec_float4 xyz0[3];
-    vec_float4 xyz1[3];
-    storeXYZArray( pnt0, xyz0 );
-    storeXYZArray( pnt1, xyz1 );
-    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
-    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
-    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
-}
-
-inline Point3 & Point3::operator =( const Point3 & pnt )
-{
-    mX = pnt.mX;
-    mY = pnt.mY;
-    mZ = pnt.mZ;
-    return *this;
-}
-
-inline Point3 & Point3::setX( vec_float4 _x )
-{
-    mX = _x;
-    return *this;
-}
-
-inline vec_float4 Point3::getX( ) const
-{
-    return mX;
-}
-
-inline Point3 & Point3::setY( vec_float4 _y )
-{
-    mY = _y;
-    return *this;
-}
-
-inline vec_float4 Point3::getY( ) const
-{
-    return mY;
-}
-
-inline Point3 & Point3::setZ( vec_float4 _z )
-{
-    mZ = _z;
-    return *this;
-}
-
-inline vec_float4 Point3::getZ( ) const
-{
-    return mZ;
-}
-
-inline Point3 & Point3::setElem( int idx, vec_float4 value )
-{
-    *(&mX + idx) = value;
-    return *this;
-}
-
-inline vec_float4 Point3::getElem( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline Point3::vec_float4_t & Point3::operator []( int idx )
-{
-    return *(&mX + idx);
-}
-
-inline vec_float4 Point3::operator []( int idx ) const
-{
-    return *(&mX + idx);
-}
-
-inline const Vector3 Point3::operator -( const Point3 & pnt ) const
-{
-    return Vector3(
-        spu_sub( mX, pnt.mX ),
-        spu_sub( mY, pnt.mY ),
-        spu_sub( mZ, pnt.mZ )
-    );
-}
-
-inline const Point3 Point3::operator +( const Vector3 & vec ) const
-{
-    return Point3(
-        spu_add( mX, vec.getX() ),
-        spu_add( mY, vec.getY() ),
-        spu_add( mZ, vec.getZ() )
-    );
-}
-
-inline const Point3 Point3::operator -( const Vector3 & vec ) const
-{
-    return Point3(
-        spu_sub( mX, vec.getX() ),
-        spu_sub( mY, vec.getY() ),
-        spu_sub( mZ, vec.getZ() )
-    );
-}
-
-inline Point3 & Point3::operator +=( const Vector3 & vec )
-{
-    *this = *this + vec;
-    return *this;
-}
-
-inline Point3 & Point3::operator -=( const Vector3 & vec )
-{
-    *this = *this - vec;
-    return *this;
-}
-
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        spu_mul( pnt0.getX(), pnt1.getX() ),
-        spu_mul( pnt0.getY(), pnt1.getY() ),
-        spu_mul( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        divf4( pnt0.getX(), pnt1.getX() ),
-        divf4( pnt0.getY(), pnt1.getY() ),
-        divf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline const Point3 recipPerElem( const Point3 & pnt )
-{
-    return Point3(
-        recipf4( pnt.getX() ),
-        recipf4( pnt.getY() ),
-        recipf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 sqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        sqrtf4( pnt.getX() ),
-        sqrtf4( pnt.getY() ),
-        sqrtf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 rsqrtPerElem( const Point3 & pnt )
-{
-    return Point3(
-        rsqrtf4( pnt.getX() ),
-        rsqrtf4( pnt.getY() ),
-        rsqrtf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 absPerElem( const Point3 & pnt )
-{
-    return Point3(
-        fabsf4( pnt.getX() ),
-        fabsf4( pnt.getY() ),
-        fabsf4( pnt.getZ() )
-    );
-}
-
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        copysignf4( pnt0.getX(), pnt1.getX() ),
-        copysignf4( pnt0.getY(), pnt1.getY() ),
-        copysignf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        fmaxf4( pnt0.getX(), pnt1.getX() ),
-        fmaxf4( pnt0.getY(), pnt1.getY() ),
-        fmaxf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline vec_float4 maxElem( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = fmaxf4( pnt.getX(), pnt.getY() );
-    result = fmaxf4( pnt.getZ(), result );
-    return result;
-}
-
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return Point3(
-        fminf4( pnt0.getX(), pnt1.getX() ),
-        fminf4( pnt0.getY(), pnt1.getY() ),
-        fminf4( pnt0.getZ(), pnt1.getZ() )
-    );
-}
-
-inline vec_float4 minElem( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = fminf4( pnt.getX(), pnt.getY() );
-    result = fminf4( pnt.getZ(), result );
-    return result;
-}
-
-inline vec_float4 sum( const Point3 & pnt )
-{
-    vec_float4 result;
-    result = spu_add( pnt.getX(), pnt.getY() );
-    result = spu_add( result, pnt.getZ() );
-    return result;
-}
-
-inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal )
-{
-    return mulPerElem( pnt, Point3( scaleVal ) );
-}
-
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
-{
-    return mulPerElem( pnt, Point3( scaleVec ) );
-}
-
-inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec )
-{
-    vec_float4 result;
-    result = spu_mul( pnt.getX(), unitVec.getX() );
-    result = spu_add( result, spu_mul( pnt.getY(), unitVec.getY() ) );
-    result = spu_add( result, spu_mul( pnt.getZ(), unitVec.getZ() ) );
-    return result;
-}
-
-inline vec_float4 distSqrFromOrigin( const Point3 & pnt )
-{
-    return lengthSqr( Vector3( pnt ) );
-}
-
-inline vec_float4 distFromOrigin( const Point3 & pnt )
-{
-    return length( Vector3( pnt ) );
-}
-
-inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return lengthSqr( ( pnt1 - pnt0 ) );
-}
-
-inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 )
-{
-    return length( ( pnt1 - pnt0 ) );
-}
-
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 )
-{
-    return Point3(
-        spu_sel( pnt0.getX(), pnt1.getX(), select1 ),
-        spu_sel( pnt0.getY(), pnt1.getY(), select1 ),
-        spu_sel( pnt0.getZ(), pnt1.getZ(), select1 )
-    );
-}
-
-#ifdef _VECTORMATH_DEBUG
-
-inline void print( const Point3 & pnt )
-{
-    Aos::Point3 vec0, vec1, vec2, vec3;
-    pnt.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-inline void print( const Point3 & pnt, const char * name )
-{
-    Aos::Point3 vec0, vec1, vec2, vec3;
-    printf( "%s:\n", name );
-    pnt.get4Aos( vec0, vec1, vec2, vec3 );
-    printf("slot 0:\n");
-    print( vec0 );
-    printf("slot 1:\n");
-    print( vec1 );
-    printf("slot 2:\n");
-    print( vec2 );
-    printf("slot 3:\n");
-    print( vec3 );
-}
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_SOA_CPP_H
+#define _VECTORMATH_VEC_SOA_CPP_H
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_SHUF_X 0x00010203
+#define _VECTORMATH_SHUF_Y 0x04050607
+#define _VECTORMATH_SHUF_Z 0x08090a0b
+#define _VECTORMATH_SHUF_W 0x0c0d0e0f
+#define _VECTORMATH_SHUF_A 0x10111213
+#define _VECTORMATH_SHUF_B 0x14151617
+#define _VECTORMATH_SHUF_C 0x18191a1b
+#define _VECTORMATH_SHUF_D 0x1c1d1e1f
+#define _VECTORMATH_SHUF_0 0x80808080
+#define _VECTORMATH_SHUF_XAYB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_ZCWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_ZBW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XCY0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_ZDW0 ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_0 })
+#define _VECTORMATH_SHUF_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZDXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XDZB ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D, _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B })
+#define _VECTORMATH_SHUF_YAWC ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_A, _VECTORMATH_SHUF_W, _VECTORMATH_SHUF_C })
+#define _VECTORMATH_SHUF_ZBXD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_Z, _VECTORMATH_SHUF_B, _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SHUF_XYCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_SHUF_X, _VECTORMATH_SHUF_Y, _VECTORMATH_SHUF_C, _VECTORMATH_SHUF_D })
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Soa {
+
+inline Vector3::Vector3( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+}
+
+inline Vector3::Vector3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Vector3::Vector3( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+}
+
+inline Vector3::Vector3( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline Vector3::Vector3( Aos::Vector3 vec )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_float4 vec128 = vec.get128();
+    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
+}
+
+inline Vector3::Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_ZCWD );
+    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void Vector3::get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
+    result0 = Aos::Vector3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_XAYB ) );
+    result1 = Aos::Vector3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_ZBW0 ) );
+    result2 = Aos::Vector3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_XCY0 ) );
+    result3 = Aos::Vector3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_ZDW0 ) );
+}
+
+inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
+    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
+    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
+    vec.setX( spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
+    vec.setY( spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
+    vec.setZ( spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
+}
+
+inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = spu_shuffle( vec.getX(), vec.getY(), _VECTORMATH_SHUF_XAZC );
+    zxzx = spu_shuffle( vec.getZ(), vec.getX(), _VECTORMATH_SHUF_ZDXB );
+    yzyz = spu_shuffle( vec.getY(), vec.getZ(), _VECTORMATH_SHUF_YBWD );
+    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
+    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
+    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( vec0, xyz0 );
+    storeXYZArray( vec1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Vector3 & Vector3::operator =( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Vector3::getX( ) const
+{
+    return mX;
+}
+
+inline Vector3 & Vector3::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Vector3::getY( ) const
+{
+    return mY;
+}
+
+inline Vector3 & Vector3::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Vector3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector3 & Vector3::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Vector3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Vector3::vec_float4_t & Vector3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Vector3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
+{
+    return Vector3(
+        spu_add( mX, vec.mX ),
+        spu_add( mY, vec.mY ),
+        spu_add( mZ, vec.mZ )
+    );
+}
+
+inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
+{
+    return Vector3(
+        spu_sub( mX, vec.mX ),
+        spu_sub( mY, vec.mY ),
+        spu_sub( mZ, vec.mZ )
+    );
+}
+
+inline const Point3 Vector3::operator +( const Point3 & pnt ) const
+{
+    return Point3(
+        spu_add( mX, pnt.getX() ),
+        spu_add( mY, pnt.getY() ),
+        spu_add( mZ, pnt.getZ() )
+    );
+}
+
+inline const Vector3 Vector3::operator *( vec_float4 scalar ) const
+{
+    return Vector3(
+        spu_mul( mX, scalar ),
+        spu_mul( mY, scalar ),
+        spu_mul( mZ, scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( vec_float4 scalar ) const
+{
+    return Vector3(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ )
+    );
+}
+
+inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        spu_mul( vec0.getX(), vec1.getX() ),
+        spu_mul( vec0.getY(), vec1.getY() ),
+        spu_mul( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        divf4( vec0.getX(), vec1.getX() ),
+        divf4( vec0.getY(), vec1.getY() ),
+        divf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline const Vector3 recipPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        recipf4( vec.getX() ),
+        recipf4( vec.getY() ),
+        recipf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 sqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        sqrtf4( vec.getX() ),
+        sqrtf4( vec.getY() ),
+        sqrtf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 rsqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        rsqrtf4( vec.getX() ),
+        rsqrtf4( vec.getY() ),
+        rsqrtf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 absPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        fabsf4( vec.getX() ),
+        fabsf4( vec.getY() ),
+        fabsf4( vec.getZ() )
+    );
+}
+
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        copysignf4( vec0.getX(), vec1.getX() ),
+        copysignf4( vec0.getY(), vec1.getY() ),
+        copysignf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        fmaxf4( vec0.getX(), vec1.getX() ),
+        fmaxf4( vec0.getY(), vec1.getY() ),
+        fmaxf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline vec_float4 maxElem( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec.getX(), vec.getY() );
+    result = fmaxf4( vec.getZ(), result );
+    return result;
+}
+
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        fminf4( vec0.getX(), vec1.getX() ),
+        fminf4( vec0.getY(), vec1.getY() ),
+        fminf4( vec0.getZ(), vec1.getZ() )
+    );
+}
+
+inline vec_float4 minElem( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = fminf4( vec.getX(), vec.getY() );
+    result = fminf4( vec.getZ(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = spu_add( vec.getX(), vec.getY() );
+    result = spu_add( result, vec.getZ() );
+    return result;
+}
+
+inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0.getX(), vec1.getX() );
+    result = spu_add( result, spu_mul( vec0.getY(), vec1.getY() ) );
+    result = spu_add( result, spu_mul( vec0.getZ(), vec1.getZ() ) );
+    return result;
+}
+
+inline vec_float4 lengthSqr( const Vector3 & vec )
+{
+    vec_float4 result;
+    result = spu_mul( vec.getX(), vec.getX() );
+    result = spu_add( result, spu_mul( vec.getY(), vec.getY() ) );
+    result = spu_add( result, spu_mul( vec.getZ(), vec.getZ() ) );
+    return result;
+}
+
+inline vec_float4 length( const Vector3 & vec )
+{
+    return sqrtf4( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( const Vector3 & vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = rsqrtf4( lenSqr );
+    return Vector3(
+        spu_mul( vec.getX(), lenInv ),
+        spu_mul( vec.getY(), lenInv ),
+        spu_mul( vec.getZ(), lenInv )
+    );
+}
+
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        spu_sub( spu_mul( vec0.getY(), vec1.getZ() ), spu_mul( vec0.getZ(), vec1.getY() ) ),
+        spu_sub( spu_mul( vec0.getZ(), vec1.getX() ), spu_mul( vec0.getX(), vec1.getZ() ) ),
+        spu_sub( spu_mul( vec0.getX(), vec1.getY() ), spu_mul( vec0.getY(), vec1.getX() ) )
+    );
+}
+
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 )
+{
+    return Vector3(
+        spu_sel( vec0.getX(), vec1.getX(), select1 ),
+        spu_sel( vec0.getY(), vec1.getY(), select1 ),
+        spu_sel( vec0.getZ(), vec1.getZ(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector3 & vec )
+{
+    Aos::Vector3 vec0, vec1, vec2, vec3;
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Vector3 & vec, const char * name )
+{
+    Aos::Vector3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+inline Vector4::Vector4( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+}
+
+inline Vector4::Vector4( vec_float4 _x, vec_float4 _y, vec_float4 _z, vec_float4 _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Vector4::Vector4( const Vector3 & xyz, vec_float4 _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Vector4::Vector4( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = spu_splats(0.0f);
+}
+
+inline Vector4::Vector4( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+    mW = spu_splats(1.0f);
+}
+
+inline Vector4::Vector4( const Quat & quat )
+{
+    mX = quat.getX();
+    mY = quat.getY();
+    mZ = quat.getZ();
+    mW = quat.getW();
+}
+
+inline Vector4::Vector4( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline Vector4::Vector4( Aos::Vector4 vec )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_uchar16 shuffle_wwww = (vec_uchar16)spu_splats((int)0x0c0d0e0f);
+    vec_float4 vec128 = vec.get128();
+    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
+    mW = spu_shuffle( vec128, vec128, shuffle_wwww );
+}
+
+inline Vector4::Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( vec0.get128(), vec2.get128(), _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( vec1.get128(), vec3.get128(), _VECTORMATH_SHUF_ZCWD );
+    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+    mW = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD );
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f), spu_splats(0.0f) );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f), spu_splats(0.0f) );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( spu_splats(0.0f), spu_splats(0.0f), spu_splats(0.0f), spu_splats(1.0f) );
+}
+
+inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
+{
+    vec_float4 recipSinAngle, scale0, scale1, cosAngle, angle;
+    vec_uint4 selectMask;
+    cosAngle = dot( unitVec0, unitVec1 );
+    selectMask = (vec_uint4)spu_cmpgt( spu_splats(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    recipSinAngle = recipf4( sinf4( angle ) );
+    scale0 = spu_sel( spu_sub( spu_splats(1.0f), t ), spu_mul( sinf4( spu_mul( spu_sub( spu_splats(1.0f), t ), angle ) ), recipSinAngle ), selectMask );
+    scale1 = spu_sel( t, spu_mul( sinf4( spu_mul( t, angle ) ), recipSinAngle ), selectMask );
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void Vector4::get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( mY, mW, _VECTORMATH_SHUF_ZCWD );
+    result0 = Aos::Vector4( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB ) );
+    result1 = Aos::Vector4( spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD ) );
+    result2 = Aos::Vector4( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB ) );
+    result3 = Aos::Vector4( spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_ZCWD ) );
+}
+
+inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads )
+{
+    Aos::Vector4 v0, v1, v2, v3;
+    vec.get4Aos( v0, v1, v2, v3 );
+    twoQuads[0] = _vmath2VfToHalfFloats(v0.get128(), v1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(v2.get128(), v3.get128());
+}
+
+inline Vector4 & Vector4::operator =( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Vector4 & Vector4::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Vector4::getX( ) const
+{
+    return mX;
+}
+
+inline Vector4 & Vector4::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Vector4::getY( ) const
+{
+    return mY;
+}
+
+inline Vector4 & Vector4::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Vector4::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector4 & Vector4::setW( vec_float4 _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline vec_float4 Vector4::getW( ) const
+{
+    return mW;
+}
+
+inline Vector4 & Vector4::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Vector4::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Vector4::vec_float4_t & Vector4::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Vector4::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
+{
+    return Vector4(
+        spu_add( mX, vec.mX ),
+        spu_add( mY, vec.mY ),
+        spu_add( mZ, vec.mZ ),
+        spu_add( mW, vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
+{
+    return Vector4(
+        spu_sub( mX, vec.mX ),
+        spu_sub( mY, vec.mY ),
+        spu_sub( mZ, vec.mZ ),
+        spu_sub( mW, vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator *( vec_float4 scalar ) const
+{
+    return Vector4(
+        spu_mul( mX, scalar ),
+        spu_mul( mY, scalar ),
+        spu_mul( mZ, scalar ),
+        spu_mul( mW, scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator +=( const Vector4 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( const Vector4 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( vec_float4 scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( vec_float4 scalar ) const
+{
+    return Vector4(
+        divf4( mX, scalar ),
+        divf4( mY, scalar ),
+        divf4( mZ, scalar ),
+        divf4( mW, scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator /=( vec_float4 scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4(
+        negatef4( mX ),
+        negatef4( mY ),
+        negatef4( mZ ),
+        negatef4( mW )
+    );
+}
+
+inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        spu_mul( vec0.getX(), vec1.getX() ),
+        spu_mul( vec0.getY(), vec1.getY() ),
+        spu_mul( vec0.getZ(), vec1.getZ() ),
+        spu_mul( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        divf4( vec0.getX(), vec1.getX() ),
+        divf4( vec0.getY(), vec1.getY() ),
+        divf4( vec0.getZ(), vec1.getZ() ),
+        divf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline const Vector4 recipPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        recipf4( vec.getX() ),
+        recipf4( vec.getY() ),
+        recipf4( vec.getZ() ),
+        recipf4( vec.getW() )
+    );
+}
+
+inline const Vector4 sqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        sqrtf4( vec.getX() ),
+        sqrtf4( vec.getY() ),
+        sqrtf4( vec.getZ() ),
+        sqrtf4( vec.getW() )
+    );
+}
+
+inline const Vector4 rsqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        rsqrtf4( vec.getX() ),
+        rsqrtf4( vec.getY() ),
+        rsqrtf4( vec.getZ() ),
+        rsqrtf4( vec.getW() )
+    );
+}
+
+inline const Vector4 absPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        fabsf4( vec.getX() ),
+        fabsf4( vec.getY() ),
+        fabsf4( vec.getZ() ),
+        fabsf4( vec.getW() )
+    );
+}
+
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        copysignf4( vec0.getX(), vec1.getX() ),
+        copysignf4( vec0.getY(), vec1.getY() ),
+        copysignf4( vec0.getZ(), vec1.getZ() ),
+        copysignf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        fmaxf4( vec0.getX(), vec1.getX() ),
+        fmaxf4( vec0.getY(), vec1.getY() ),
+        fmaxf4( vec0.getZ(), vec1.getZ() ),
+        fmaxf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline vec_float4 maxElem( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = fmaxf4( vec.getX(), vec.getY() );
+    result = fmaxf4( vec.getZ(), result );
+    result = fmaxf4( vec.getW(), result );
+    return result;
+}
+
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        fminf4( vec0.getX(), vec1.getX() ),
+        fminf4( vec0.getY(), vec1.getY() ),
+        fminf4( vec0.getZ(), vec1.getZ() ),
+        fminf4( vec0.getW(), vec1.getW() )
+    );
+}
+
+inline vec_float4 minElem( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = fminf4( vec.getX(), vec.getY() );
+    result = fminf4( vec.getZ(), result );
+    result = fminf4( vec.getW(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = spu_add( vec.getX(), vec.getY() );
+    result = spu_add( result, vec.getZ() );
+    result = spu_add( result, vec.getW() );
+    return result;
+}
+
+inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0.getX(), vec1.getX() );
+    result = spu_add( result, spu_mul( vec0.getY(), vec1.getY() ) );
+    result = spu_add( result, spu_mul( vec0.getZ(), vec1.getZ() ) );
+    result = spu_add( result, spu_mul( vec0.getW(), vec1.getW() ) );
+    return result;
+}
+
+inline vec_float4 lengthSqr( const Vector4 & vec )
+{
+    vec_float4 result;
+    result = spu_mul( vec.getX(), vec.getX() );
+    result = spu_add( result, spu_mul( vec.getY(), vec.getY() ) );
+    result = spu_add( result, spu_mul( vec.getZ(), vec.getZ() ) );
+    result = spu_add( result, spu_mul( vec.getW(), vec.getW() ) );
+    return result;
+}
+
+inline vec_float4 length( const Vector4 & vec )
+{
+    return sqrtf4( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( const Vector4 & vec )
+{
+    vec_float4 lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = rsqrtf4( lenSqr );
+    return Vector4(
+        spu_mul( vec.getX(), lenInv ),
+        spu_mul( vec.getY(), lenInv ),
+        spu_mul( vec.getZ(), lenInv ),
+        spu_mul( vec.getW(), lenInv )
+    );
+}
+
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 )
+{
+    return Vector4(
+        spu_sel( vec0.getX(), vec1.getX(), select1 ),
+        spu_sel( vec0.getY(), vec1.getY(), select1 ),
+        spu_sel( vec0.getZ(), vec1.getZ(), select1 ),
+        spu_sel( vec0.getW(), vec1.getW(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector4 & vec )
+{
+    Aos::Vector4 vec0, vec1, vec2, vec3;
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Vector4 & vec, const char * name )
+{
+    Aos::Vector4 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    vec.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+inline Point3::Point3( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+}
+
+inline Point3::Point3( vec_float4 _x, vec_float4 _y, vec_float4 _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Point3::Point3( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+}
+
+inline Point3::Point3( vec_float4 scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline Point3::Point3( Aos::Point3 pnt )
+{
+    vec_uchar16 shuffle_xxxx = (vec_uchar16)spu_splats((int)0x00010203);
+    vec_uchar16 shuffle_yyyy = (vec_uchar16)spu_splats((int)0x04050607);
+    vec_uchar16 shuffle_zzzz = (vec_uchar16)spu_splats((int)0x08090a0b);
+    vec_float4 vec128 = pnt.get128();
+    mX = spu_shuffle( vec128, vec128, shuffle_xxxx );
+    mY = spu_shuffle( vec128, vec128, shuffle_yyyy );
+    mZ = spu_shuffle( vec128, vec128, shuffle_zzzz );
+}
+
+inline Point3::Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 )
+{
+    vec_float4 tmp0, tmp1, tmp2, tmp3;
+    tmp0 = spu_shuffle( pnt0.get128(), pnt2.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( pnt1.get128(), pnt3.get128(), _VECTORMATH_SHUF_XAYB );
+    tmp2 = spu_shuffle( pnt0.get128(), pnt2.get128(), _VECTORMATH_SHUF_ZCWD );
+    tmp3 = spu_shuffle( pnt1.get128(), pnt3.get128(), _VECTORMATH_SHUF_ZCWD );
+    mX = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_XAYB );
+    mY = spu_shuffle( tmp0, tmp1, _VECTORMATH_SHUF_ZCWD );
+    mZ = spu_shuffle( tmp2, tmp3, _VECTORMATH_SHUF_XAYB );
+}
+
+inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline void Point3::get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const
+{
+    vec_float4 tmp0, tmp1;
+    tmp0 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_XAYB );
+    tmp1 = spu_shuffle( mX, mZ, _VECTORMATH_SHUF_ZCWD );
+    result0 = Aos::Point3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_XAYB ) );
+    result1 = Aos::Point3( spu_shuffle( tmp0, mY, _VECTORMATH_SHUF_ZBW0 ) );
+    result2 = Aos::Point3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_XCY0 ) );
+    result3 = Aos::Point3( spu_shuffle( tmp1, mY, _VECTORMATH_SHUF_ZDW0 ) );
+}
+
+inline void loadXYZArray( Point3 & vec, const vec_float4 * threeQuads )
+{
+    vec_float4 xyxy, yzyz, zxzx, xyzx, yzxy, zxyz;
+    xyzx = threeQuads[0];
+    yzxy = threeQuads[1];
+    zxyz = threeQuads[2];
+    xyxy = spu_shuffle( xyzx, yzxy, _VECTORMATH_SHUF_XYCD );
+    zxzx = spu_shuffle( zxyz, xyzx, _VECTORMATH_SHUF_XYCD );
+    yzyz = spu_shuffle( yzxy, zxyz, _VECTORMATH_SHUF_XYCD );
+    vec.setX( spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XDZB ) );
+    vec.setY( spu_shuffle( xyxy, yzyz, _VECTORMATH_SHUF_YAWC ) );
+    vec.setZ( spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_ZBXD ) );
+}
+
+inline void storeXYZArray( const Point3 & vec, vec_float4 * threeQuads )
+{
+    vec_float4 xyzx, yzxy, zxyz, xyxy, zxzx, yzyz;
+    xyxy = spu_shuffle( vec.getX(), vec.getY(), _VECTORMATH_SHUF_XAZC );
+    zxzx = spu_shuffle( vec.getZ(), vec.getX(), _VECTORMATH_SHUF_ZDXB );
+    yzyz = spu_shuffle( vec.getY(), vec.getZ(), _VECTORMATH_SHUF_YBWD );
+    xyzx = spu_shuffle( xyxy, zxzx, _VECTORMATH_SHUF_XYCD );
+    yzxy = spu_shuffle( yzyz, xyxy, _VECTORMATH_SHUF_XYCD );
+    zxyz = spu_shuffle( zxzx, yzyz, _VECTORMATH_SHUF_XYCD );
+    threeQuads[0] = xyzx;
+    threeQuads[1] = yzxy;
+    threeQuads[2] = zxyz;
+}
+
+inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads )
+{
+    vec_float4 xyz0[3];
+    vec_float4 xyz1[3];
+    storeXYZArray( pnt0, xyz0 );
+    storeXYZArray( pnt1, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+}
+
+inline Point3 & Point3::operator =( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+    return *this;
+}
+
+inline Point3 & Point3::setX( vec_float4 _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline vec_float4 Point3::getX( ) const
+{
+    return mX;
+}
+
+inline Point3 & Point3::setY( vec_float4 _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline vec_float4 Point3::getY( ) const
+{
+    return mY;
+}
+
+inline Point3 & Point3::setZ( vec_float4 _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline vec_float4 Point3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Point3 & Point3::setElem( int idx, vec_float4 value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline vec_float4 Point3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline Point3::vec_float4_t & Point3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline vec_float4 Point3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Point3::operator -( const Point3 & pnt ) const
+{
+    return Vector3(
+        spu_sub( mX, pnt.mX ),
+        spu_sub( mY, pnt.mY ),
+        spu_sub( mZ, pnt.mZ )
+    );
+}
+
+inline const Point3 Point3::operator +( const Vector3 & vec ) const
+{
+    return Point3(
+        spu_add( mX, vec.getX() ),
+        spu_add( mY, vec.getY() ),
+        spu_add( mZ, vec.getZ() )
+    );
+}
+
+inline const Point3 Point3::operator -( const Vector3 & vec ) const
+{
+    return Point3(
+        spu_sub( mX, vec.getX() ),
+        spu_sub( mY, vec.getY() ),
+        spu_sub( mZ, vec.getZ() )
+    );
+}
+
+inline Point3 & Point3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        spu_mul( pnt0.getX(), pnt1.getX() ),
+        spu_mul( pnt0.getY(), pnt1.getY() ),
+        spu_mul( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        divf4( pnt0.getX(), pnt1.getX() ),
+        divf4( pnt0.getY(), pnt1.getY() ),
+        divf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline const Point3 recipPerElem( const Point3 & pnt )
+{
+    return Point3(
+        recipf4( pnt.getX() ),
+        recipf4( pnt.getY() ),
+        recipf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 sqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        sqrtf4( pnt.getX() ),
+        sqrtf4( pnt.getY() ),
+        sqrtf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 rsqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        rsqrtf4( pnt.getX() ),
+        rsqrtf4( pnt.getY() ),
+        rsqrtf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 absPerElem( const Point3 & pnt )
+{
+    return Point3(
+        fabsf4( pnt.getX() ),
+        fabsf4( pnt.getY() ),
+        fabsf4( pnt.getZ() )
+    );
+}
+
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        copysignf4( pnt0.getX(), pnt1.getX() ),
+        copysignf4( pnt0.getY(), pnt1.getY() ),
+        copysignf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        fmaxf4( pnt0.getX(), pnt1.getX() ),
+        fmaxf4( pnt0.getY(), pnt1.getY() ),
+        fmaxf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline vec_float4 maxElem( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = fmaxf4( pnt.getX(), pnt.getY() );
+    result = fmaxf4( pnt.getZ(), result );
+    return result;
+}
+
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        fminf4( pnt0.getX(), pnt1.getX() ),
+        fminf4( pnt0.getY(), pnt1.getY() ),
+        fminf4( pnt0.getZ(), pnt1.getZ() )
+    );
+}
+
+inline vec_float4 minElem( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = fminf4( pnt.getX(), pnt.getY() );
+    result = fminf4( pnt.getZ(), result );
+    return result;
+}
+
+inline vec_float4 sum( const Point3 & pnt )
+{
+    vec_float4 result;
+    result = spu_add( pnt.getX(), pnt.getY() );
+    result = spu_add( result, pnt.getZ() );
+    return result;
+}
+
+inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec )
+{
+    vec_float4 result;
+    result = spu_mul( pnt.getX(), unitVec.getX() );
+    result = spu_add( result, spu_mul( pnt.getY(), unitVec.getY() ) );
+    result = spu_add( result, spu_mul( pnt.getZ(), unitVec.getZ() ) );
+    return result;
+}
+
+inline vec_float4 distSqrFromOrigin( const Point3 & pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline vec_float4 distFromOrigin( const Point3 & pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 )
+{
+    return Point3(
+        spu_sel( pnt0.getX(), pnt1.getX(), select1 ),
+        spu_sel( pnt0.getY(), pnt1.getY(), select1 ),
+        spu_sel( pnt0.getZ(), pnt1.getZ(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Point3 & pnt )
+{
+    Aos::Point3 vec0, vec1, vec2, vec3;
+    pnt.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+inline void print( const Point3 & pnt, const char * name )
+{
+    Aos::Point3 vec0, vec1, vec2, vec3;
+    printf( "%s:\n", name );
+    pnt.get4Aos( vec0, vec1, vec2, vec3 );
+    printf("slot 0:\n");
+    print( vec0 );
+    printf("slot 1:\n");
+    print( vec1 );
+    printf("slot 2:\n");
+    print( vec2 );
+    printf("slot 3:\n");
+    print( vec3 );
+}
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vecidx_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vecidx_aos.h
index f5309153d..e46578ad0 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vecidx_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vecidx_aos.h
@@ -1,64 +1,64 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_VECIDX_AOS_H
-#define _VECTORMATH_VECIDX_AOS_H
-
-#include <spu_intrinsics.h>
-
-namespace Vectormath {
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// VecIdx 
-// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
-// subscripting operator.
-//
-
-class VecIdx
-{
-private:
-    typedef vec_float4 vec_float4_t;
-    vec_float4_t &ref __attribute__ ((aligned(16)));
-    int i __attribute__ ((aligned(16)));
-public:
-    inline VecIdx( vec_float4& vec, int idx ): ref(vec) { i = idx; }
-    inline operator float() const;
-    inline float operator =( float scalar );
-    inline float operator =( const VecIdx& scalar );
-    inline float operator *=( float scalar );
-    inline float operator /=( float scalar );
-    inline float operator +=( float scalar );
-    inline float operator -=( float scalar );
-};
-
-} // namespace Aos
-} // namespace Vectormath
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VECIDX_AOS_H
+#define _VECTORMATH_VECIDX_AOS_H
+
+#include <spu_intrinsics.h>
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// VecIdx 
+// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
+// subscripting operator.
+//
+
+class VecIdx
+{
+private:
+    typedef vec_float4 vec_float4_t;
+    vec_float4_t &ref __attribute__ ((aligned(16)));
+    int i __attribute__ ((aligned(16)));
+public:
+    inline VecIdx( vec_float4& vec, int idx ): ref(vec) { i = idx; }
+    inline operator float() const;
+    inline float operator =( float scalar );
+    inline float operator =( const VecIdx& scalar );
+    inline float operator *=( float scalar );
+    inline float operator /=( float scalar );
+    inline float operator +=( float scalar );
+    inline float operator -=( float scalar );
+};
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_aos.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_aos.h
index dd583790b..5fcd55c2e 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_aos.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_aos.h
@@ -1,1851 +1,1851 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_AOS_CPP_H
-#define _VECTORMATH_AOS_CPP_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-#include "floatInVec.h"
-#include "boolInVec.h"
-#include "stdio.h"
-#include "vecidx_aos.h"
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-namespace Vectormath {
-
-namespace Aos {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A 3-D vector in array-of-structures format
-//
-class Vector3
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( float x, float y, float z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( Point3 pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( float scalar );
-
-    // Set vector float data in a 3-D vector
-    // 
-    explicit inline Vector3( vec_float4 vf4 );
-
-    // Get vector float data from a 3-D vector
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( Vector3 vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( float x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( float y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( float z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline float getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, float value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( Vector3 vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( Vector3 vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( Point3 pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( float scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( Vector3 vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( Vector3 vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( float scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-};
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( float scalar, Vector3 vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( Vector3 vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( Vector3 vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( Vector3 vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( Vector3 vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline float maxElem( Vector3 vec );
-
-// Minimum element of a 3-D vector
-// 
-inline float minElem( Vector3 vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline float sum( Vector3 vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline float dot( Vector3 vec0, Vector3 vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline float lengthSqr( Vector3 vec );
-
-// Compute the length of a 3-D vector
-// 
-inline float length( Vector3 vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( Vector3 vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( Vector3 vec0, Vector3 vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( Vector3 vec0, Vector3 vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// NOTE: 
-// Slower than column post-multiply.
-// 
-inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( Vector3 vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 );
-
-// Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
-// The value of the fourth word (the word with the highest address) remains unchanged
-// 
-inline void storeXYZ( Vector3 vec, vec_float4 * quad );
-
-// Load four three-float 3-D vectors, stored in three quadwords
-// 
-inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads );
-
-// Store four 3-D vectors in three quadwords
-// 
-inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads );
-
-// Store eight 3-D vectors as half-floats
-// 
-inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector3 vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector3 vec, const char * name );
-
-#endif
-
-// A 4-D vector in array-of-structures format
-//
-class Vector4
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( float x, float y, float z, float w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( Vector3 xyz, float w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( Vector3 vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( Point3 pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( Quat quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( float scalar );
-
-    // Set vector float data in a 4-D vector
-    // 
-    explicit inline Vector4( vec_float4 vf4 );
-
-    // Get vector float data from a 4-D vector
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( Vector4 vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( Vector3 vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( float x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( float y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( float z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( float w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline float getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline float getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, float value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( Vector4 vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( Vector4 vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( float scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( Vector4 vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( Vector4 vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( float scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-};
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( float scalar, Vector4 vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( Vector4 vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( Vector4 vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( Vector4 vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( Vector4 vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline float maxElem( Vector4 vec );
-
-// Minimum element of a 4-D vector
-// 
-inline float minElem( Vector4 vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline float sum( Vector4 vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline float dot( Vector4 vec0, Vector4 vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline float lengthSqr( Vector4 vec );
-
-// Compute the length of a 4-D vector
-// 
-inline float length( Vector4 vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( Vector4 vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( Vector4 vec0, Vector4 vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 );
-
-// Store four 4-D vectors as half-floats
-// 
-inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector4 vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Vector4 vec, const char * name );
-
-#endif
-
-// A 3-D point in array-of-structures format
-//
-class Point3
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( float x, float y, float z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( Vector3 vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( float scalar );
-
-    // Set vector float data in a 3-D point
-    // 
-    explicit inline Point3( vec_float4 vf4 );
-
-    // Get vector float data from a 3-D point
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( Point3 pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( float x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( float y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( float z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline float getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, float value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( Point3 pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( Vector3 vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( Vector3 vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( Vector3 vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( Vector3 vec );
-
-};
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( Point3 pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( Point3 pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( Point3 pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( Point3 pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline float maxElem( Point3 pnt );
-
-// Minimum element of a 3-D point
-// 
-inline float minElem( Point3 pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline float sum( Point3 pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( Point3 pnt, float scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( Point3 pnt, Vector3 scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline float projection( Point3 pnt, Vector3 unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline float distSqrFromOrigin( Point3 pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline float distFromOrigin( Point3 pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline float distSqr( Point3 pnt0, Point3 pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline float dist( Point3 pnt0, Point3 pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 );
-
-// Conditionally select between two 3-D points
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 );
-
-// Store x, y, and z elements of a 3-D point in the first three words of a quadword.
-// The value of the fourth word (the word with the highest address) remains unchanged
-// 
-inline void storeXYZ( Point3 pnt, vec_float4 * quad );
-
-// Load four three-float 3-D points, stored in three quadwords
-// 
-inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads );
-
-// Store four 3-D points in three quadwords
-// 
-inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads );
-
-// Store eight 3-D points as half-floats
-// 
-inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Point3 pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Point3 pnt, const char * name );
-
-#endif
-
-// A quaternion in array-of-structures format
-//
-class Quat
-{
-    vec_float4 mVec128;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( float x, float y, float z, float w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( Vector3 xyz, float w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( Vector4 vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( float scalar );
-
-    // Set vector float data in a quaternion
-    // 
-    explicit inline Quat( vec_float4 vf4 );
-
-    // Get vector float data from a quaternion
-    // 
-    inline vec_float4 get128( ) const;
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( Quat quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( Vector3 vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( float x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( float y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( float z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( float w );
-
-    // Get the x element of a quaternion
-    // 
-    inline float getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline float getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline float getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline float getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, float value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline float getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline VecIdx operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline float operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( Quat quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( Quat quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( Quat quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( float scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( float scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( Quat quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( Quat quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( Quat quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( float scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( float scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( Vector3 unitVec0, Vector3 unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( float radians, Vector3 unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( float radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( float radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( float radians );
-
-};
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( float scalar, Quat quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( Quat quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( Quat unitQuat, Vector3 vec );
-
-// Compute the dot product of two quaternions
-// 
-inline float dot( Quat quat0, Quat quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline float norm( Quat quat );
-
-// Compute the length of a quaternion
-// 
-inline float length( Quat quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( Quat quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( float t, Quat quat0, Quat quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
-
-// Conditionally select between two quaternions
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Quat select( Quat quat0, Quat quat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Quat quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( Quat quat, const char * name );
-
-#endif
-
-// A 3x3 matrix in array-of-structures format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( Vector3 col0, Vector3 col1, Vector3 col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( Quat unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( float scalar );
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( Vector3 col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( Vector3 col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( Vector3 col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, Vector3 vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, Vector3 vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, float val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( float scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( Vector3 vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( float radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( float radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( float radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( float radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( Quat unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( Vector3 scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline float determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A 4x4 matrix in array-of-structures format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( Vector4 col0, Vector4 col1, Vector4 col2, Vector4 col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, Vector3 translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( Quat unitQuat, Vector3 translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( float scalar );
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( Vector3 translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( Vector4 col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( Vector4 col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( Vector4 col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( Vector4 col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, Vector4 vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, Vector4 vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, float val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( float scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( Vector4 vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( Vector3 vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( Point3 pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( float scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( float radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( float radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( float radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( float radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( Quat unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( Vector3 scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( Vector3 translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline float determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A 3x4 transformation matrix in array-of-structures format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( Vector3 col0, Vector3 col1, Vector3 col2, Vector3 col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, Vector3 translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( Quat unitQuat, Vector3 translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( float scalar );
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( Vector3 translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( Vector3 col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( Vector3 col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( Vector3 col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( Vector3 col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, Vector3 vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, Vector4 vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, float val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline float getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( Vector3 vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( Point3 pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( float radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( Vector3 radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( float radians, Vector3 unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( Quat unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( Vector3 scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( Vector3 translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Aos
-} // namespace Vectormath
-
-#include "vec_aos.h"
-#include "quat_aos.h"
-#include "mat_aos.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_SPU_H
+#define _VECTORMATH_AOS_CPP_SPU_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+#include "floatInVec.h"
+#include "boolInVec.h"
+#include "vecidx_aos.h"
+#include <stdio.h>
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( float x, float y, float z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( Point3 pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( float scalar );
+
+    // Set vector float data in a 3-D vector
+    // 
+    explicit inline Vector3( vec_float4 vf4 );
+
+    // Get vector float data from a 3-D vector
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( Vector3 vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( float z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( Vector3 vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( Vector3 vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( Point3 pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( Vector3 vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( Vector3 vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( float scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( float scalar, Vector3 vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( Vector3 vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( Vector3 vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( Vector3 vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( Vector3 vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( Vector3 vec0, Vector3 vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline float maxElem( Vector3 vec );
+
+// Minimum element of a 3-D vector
+// 
+inline float minElem( Vector3 vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline float sum( Vector3 vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline float dot( Vector3 vec0, Vector3 vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline float lengthSqr( Vector3 vec );
+
+// Compute the length of a 3-D vector
+// 
+inline float length( Vector3 vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( Vector3 vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( Vector3 vec0, Vector3 vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( Vector3 vec0, Vector3 vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// NOTE: 
+// Slower than column post-multiply.
+// 
+inline const Vector3 rowMul( Vector3 vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( Vector3 vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( Vector3 vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( float t, Vector3 vec0, Vector3 vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( float t, Vector3 unitVec0, Vector3 unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector3 select( Vector3 vec0, Vector3 vec1, bool select1 );
+
+// Store x, y, and z elements of a 3-D vector in the first three words of a quadword.
+// The value of the fourth word (the word with the highest address) remains unchanged
+// 
+inline void storeXYZ( Vector3 vec, vec_float4 * quad );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+inline void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const vec_float4 * threeQuads );
+
+// Store four 3-D vectors in three quadwords
+// 
+inline void storeXYZArray( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, vec_float4 * threeQuads );
+
+// Store eight 3-D vectors as half-floats
+// 
+inline void storeHalfFloats( Vector3 vec0, Vector3 vec1, Vector3 vec2, Vector3 vec3, Vector3 vec4, Vector3 vec5, Vector3 vec6, Vector3 vec7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector3 vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector3 vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( Vector3 xyz, float w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( Vector3 vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( Point3 pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( Quat quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( float scalar );
+
+    // Set vector float data in a 4-D vector
+    // 
+    explicit inline Vector4( vec_float4 vf4 );
+
+    // Get vector float data from a 4-D vector
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( Vector4 vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( Vector3 vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( float w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( Vector4 vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( Vector4 vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( Vector4 vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( Vector4 vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( float scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( float scalar, Vector4 vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( Vector4 vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( Vector4 vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( Vector4 vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( Vector4 vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( Vector4 vec0, Vector4 vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline float maxElem( Vector4 vec );
+
+// Minimum element of a 4-D vector
+// 
+inline float minElem( Vector4 vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline float sum( Vector4 vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline float dot( Vector4 vec0, Vector4 vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline float lengthSqr( Vector4 vec );
+
+// Compute the length of a 4-D vector
+// 
+inline float length( Vector4 vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( Vector4 vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( Vector4 vec0, Vector4 vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( float t, Vector4 vec0, Vector4 vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( float t, Vector4 unitVec0, Vector4 unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector4 select( Vector4 vec0, Vector4 vec1, bool select1 );
+
+// Store four 4-D vectors as half-floats
+// 
+inline void storeHalfFloats( Vector4 vec0, Vector4 vec1, Vector4 vec2, Vector4 vec3, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector4 vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Vector4 vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( float x, float y, float z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( Vector3 vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( float scalar );
+
+    // Set vector float data in a 3-D point
+    // 
+    explicit inline Point3( vec_float4 vf4 );
+
+    // Get vector float data from a 3-D point
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( Point3 pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( float z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( Point3 pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( Vector3 vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( Vector3 vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( Vector3 vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( Vector3 vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( Point3 pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( Point3 pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( Point3 pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( Point3 pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( Point3 pnt0, Point3 pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline float maxElem( Point3 pnt );
+
+// Minimum element of a 3-D point
+// 
+inline float minElem( Point3 pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline float sum( Point3 pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( Point3 pnt, float scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( Point3 pnt, Vector3 scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline float projection( Point3 pnt, Vector3 unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distSqrFromOrigin( Point3 pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distFromOrigin( Point3 pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline float distSqr( Point3 pnt0, Point3 pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline float dist( Point3 pnt0, Point3 pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( float t, Point3 pnt0, Point3 pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Point3 select( Point3 pnt0, Point3 pnt1, bool select1 );
+
+// Store x, y, and z elements of a 3-D point in the first three words of a quadword.
+// The value of the fourth word (the word with the highest address) remains unchanged
+// 
+inline void storeXYZ( Point3 pnt, vec_float4 * quad );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+inline void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const vec_float4 * threeQuads );
+
+// Store four 3-D points in three quadwords
+// 
+inline void storeXYZArray( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, vec_float4 * threeQuads );
+
+// Store eight 3-D points as half-floats
+// 
+inline void storeHalfFloats( Point3 pnt0, Point3 pnt1, Point3 pnt2, Point3 pnt3, Point3 pnt4, Point3 pnt5, Point3 pnt6, Point3 pnt7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Point3 pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Point3 pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+    vec_float4 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( float x, float y, float z, float w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( Vector3 xyz, float w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( Vector4 vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( float scalar );
+
+    // Set vector float data in a quaternion
+    // 
+    explicit inline Quat( vec_float4 vf4 );
+
+    // Get vector float data from a quaternion
+    // 
+    inline vec_float4 get128( ) const;
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( Quat quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( Vector3 vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( float w );
+
+    // Get the x element of a quaternion
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( Quat quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( Quat quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( Quat quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( Quat quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( Quat quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( Quat quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( float scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( Vector3 unitVec0, Vector3 unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( float radians, Vector3 unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( float radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( float scalar, Quat quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( Quat quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( Quat unitQuat, Vector3 vec );
+
+// Compute the dot product of two quaternions
+// 
+inline float dot( Quat quat0, Quat quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline float norm( Quat quat );
+
+// Compute the length of a quaternion
+// 
+inline float length( Quat quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( Quat quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( float t, Quat quat0, Quat quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( float t, Quat unitQuat0, Quat unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( float t, Quat unitQuat0, Quat unitQuat1, Quat unitQuat2, Quat unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Quat select( Quat quat0, Quat quat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Quat quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( Quat quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( Vector3 col0, Vector3 col1, Vector3 col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( Quat unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( float scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( Vector3 col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( Vector3 col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( Vector3 col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, Vector3 vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, Vector3 vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( Vector3 vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( float radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( Quat unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( Vector3 scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( Vector3 scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline float determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( Vector4 col0, Vector4 col1, Vector4 col2, Vector4 col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, Vector3 translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( Quat unitQuat, Vector3 translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( float scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( Vector3 translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( Vector4 col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( Vector4 col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( Vector4 col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( Vector4 col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, Vector4 vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, Vector4 vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, float val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( Vector4 vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( Vector3 vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( Point3 pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( float radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( Quat unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( Vector3 scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( Vector3 translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( Point3 eyePos, Point3 lookAtPos, Vector3 upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( Vector3 scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline float determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( Vector3 col0, Vector3 col1, Vector3 col2, Vector3 col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, Vector3 translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( Quat unitQuat, Vector3 translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( float scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( Vector3 translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( Vector3 col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( Vector3 col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( Vector3 col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( Vector3 col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, Vector3 vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, Vector4 vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( Vector3 vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( Point3 pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( Vector3 radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( float radians, Vector3 unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( Quat unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( Vector3 scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( Vector3 translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, Vector3 scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( Vector3 scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_soa.h b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_soa.h
index 31b8f8304..71843ef14 100644
--- a/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_soa.h
+++ b/Extras/vectormathlibrary/include/vectormath/spu/cpp/vectormath_soa.h
@@ -1,1921 +1,1921 @@
-/*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
-   All rights reserved.
-
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef _VECTORMATH_SOA_CPP_H
-#define _VECTORMATH_SOA_CPP_H
-
-#include <math.h>
-#include <spu_intrinsics.h>
-#include "floatInVec.h"
-#include "boolInVec.h"
-#include "stdio.h"
-#include "vectormath_aos.h"
-
-#ifdef _VECTORMATH_DEBUG
-#endif
-
-namespace Vectormath {
-
-namespace Soa {
-
-//-----------------------------------------------------------------------------
-// Forward Declarations
-//
-
-class Vector3;
-class Vector4;
-class Point3;
-class Quat;
-class Matrix3;
-class Matrix4;
-class Transform3;
-
-// A set of four 3-D vectors in structure-of-arrays format
-//
-class Vector3
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector3( ) { };
-
-    // Copy a 3-D vector
-    // 
-    inline Vector3( const Vector3 & vec );
-
-    // Construct a 3-D vector from x, y, and z elements
-    // 
-    inline Vector3( vec_float4 x, vec_float4 y, vec_float4 z );
-
-    // Copy elements from a 3-D point into a 3-D vector
-    // 
-    explicit inline Vector3( const Point3 & pnt );
-
-    // Set all elements of a 3-D vector to the same scalar value
-    // 
-    explicit inline Vector3( vec_float4 scalar );
-
-    // Replicate an AoS 3-D vector
-    // 
-    inline Vector3( Aos::Vector3 vec );
-
-    // Insert four AoS 3-D vectors
-    // 
-    inline Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 );
-
-    // Extract four AoS 3-D vectors
-    // 
-    inline void get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const;
-
-    // Assign one 3-D vector to another
-    // 
-    inline Vector3 & operator =( const Vector3 & vec );
-
-    // Set the x element of a 3-D vector
-    // 
-    inline Vector3 & setX( vec_float4 x );
-
-    // Set the y element of a 3-D vector
-    // 
-    inline Vector3 & setY( vec_float4 y );
-
-    // Set the z element of a 3-D vector
-    // 
-    inline Vector3 & setZ( vec_float4 z );
-
-    // Get the x element of a 3-D vector
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 3-D vector
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 3-D vector
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D vector by index
-    // 
-    inline Vector3 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, or z element of a 3-D vector by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two 3-D vectors
-    // 
-    inline const Vector3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from another 3-D vector
-    // 
-    inline const Vector3 operator -( const Vector3 & vec ) const;
-
-    // Add a 3-D vector to a 3-D point
-    // 
-    inline const Point3 operator +( const Point3 & pnt ) const;
-
-    // Multiply a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator *( vec_float4 scalar ) const;
-
-    // Divide a 3-D vector by a scalar
-    // 
-    inline const Vector3 operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Vector3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Vector3 & operator -=( const Vector3 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector3 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector3 & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a 3-D vector
-    // 
-    inline const Vector3 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector3 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector3 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector3 zAxis( );
-
-};
-
-// Multiply a 3-D vector by a scalar
-// 
-inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec );
-
-// Multiply two 3-D vectors per element
-// 
-inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Divide two 3-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the reciprocal of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector3 recipPerElem( const Vector3 & vec );
-
-// Compute the square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector3 sqrtPerElem( const Vector3 & vec );
-
-// Compute the reciprocal square root of a 3-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector3 rsqrtPerElem( const Vector3 & vec );
-
-// Compute the absolute value of a 3-D vector per element
-// 
-inline const Vector3 absPerElem( const Vector3 & vec );
-
-// Copy sign from one 3-D vector to another, per element
-// 
-inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum of two 3-D vectors per element
-// 
-inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Minimum of two 3-D vectors per element
-// 
-inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Maximum element of a 3-D vector
-// 
-inline vec_float4 maxElem( const Vector3 & vec );
-
-// Minimum element of a 3-D vector
-// 
-inline vec_float4 minElem( const Vector3 & vec );
-
-// Compute the sum of all elements of a 3-D vector
-// 
-inline vec_float4 sum( const Vector3 & vec );
-
-// Compute the dot product of two 3-D vectors
-// 
-inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Compute the square of the length of a 3-D vector
-// 
-inline vec_float4 lengthSqr( const Vector3 & vec );
-
-// Compute the length of a 3-D vector
-// 
-inline vec_float4 length( const Vector3 & vec );
-
-// Normalize a 3-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector3 normalize( const Vector3 & vec );
-
-// Compute cross product of two 3-D vectors
-// 
-inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Outer product of two 3-D vectors
-// 
-inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
-
-// Pre-multiply a row vector by a 3x3 matrix
-// 
-inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Cross-product matrix of a 3-D vector
-// 
-inline const Matrix3 crossMatrix( const Vector3 & vec );
-
-// Create cross-product matrix and multiply
-// NOTE: 
-// Faster than separately creating a cross-product matrix and multiplying.
-// 
-inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
-
-// Linear interpolation between two 3-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 );
-
-// Spherical linear interpolation between two 3-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-// Conditionally select between two 3-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 );
-
-// Load four three-float 3-D vectors, stored in three quadwords
-// 
-inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads );
-
-// Store four slots of an SoA 3-D vector in three quadwords
-// 
-inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads );
-
-// Store eight slots of two SoA 3-D vectors as half-floats
-// 
-inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec );
-
-// Print a 3-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector3 & vec, const char * name );
-
-#endif
-
-// A set of four 4-D vectors in structure-of-arrays format
-//
-class Vector4
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-    vec_float4 mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Vector4( ) { };
-
-    // Copy a 4-D vector
-    // 
-    inline Vector4( const Vector4 & vec );
-
-    // Construct a 4-D vector from x, y, z, and w elements
-    // 
-    inline Vector4( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-    // Construct a 4-D vector from a 3-D vector and a scalar
-    // 
-    inline Vector4( const Vector3 & xyz, vec_float4 w );
-
-    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
-    // 
-    explicit inline Vector4( const Vector3 & vec );
-
-    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
-    // 
-    explicit inline Vector4( const Point3 & pnt );
-
-    // Copy elements from a quaternion into a 4-D vector
-    // 
-    explicit inline Vector4( const Quat & quat );
-
-    // Set all elements of a 4-D vector to the same scalar value
-    // 
-    explicit inline Vector4( vec_float4 scalar );
-
-    // Replicate an AoS 4-D vector
-    // 
-    inline Vector4( Aos::Vector4 vec );
-
-    // Insert four AoS 4-D vectors
-    // 
-    inline Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 );
-
-    // Extract four AoS 4-D vectors
-    // 
-    inline void get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const;
-
-    // Assign one 4-D vector to another
-    // 
-    inline Vector4 & operator =( const Vector4 & vec );
-
-    // Set the x, y, and z elements of a 4-D vector
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Vector4 & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a 4-D vector
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a 4-D vector
-    // 
-    inline Vector4 & setX( vec_float4 x );
-
-    // Set the y element of a 4-D vector
-    // 
-    inline Vector4 & setY( vec_float4 y );
-
-    // Set the z element of a 4-D vector
-    // 
-    inline Vector4 & setZ( vec_float4 z );
-
-    // Set the w element of a 4-D vector
-    // 
-    inline Vector4 & setW( vec_float4 w );
-
-    // Get the x element of a 4-D vector
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 4-D vector
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 4-D vector
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Get the w element of a 4-D vector
-    // 
-    inline vec_float4 getW( ) const;
-
-    // Set an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline Vector4 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, z, or w element of a 4-D vector by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two 4-D vectors
-    // 
-    inline const Vector4 operator +( const Vector4 & vec ) const;
-
-    // Subtract a 4-D vector from another 4-D vector
-    // 
-    inline const Vector4 operator -( const Vector4 & vec ) const;
-
-    // Multiply a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator *( vec_float4 scalar ) const;
-
-    // Divide a 4-D vector by a scalar
-    // 
-    inline const Vector4 operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a 4-D vector
-    // 
-    inline Vector4 & operator +=( const Vector4 & vec );
-
-    // Perform compound assignment and subtraction by a 4-D vector
-    // 
-    inline Vector4 & operator -=( const Vector4 & vec );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Vector4 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Vector4 & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a 4-D vector
-    // 
-    inline const Vector4 operator -( ) const;
-
-    // Construct x axis
-    // 
-    static inline const Vector4 xAxis( );
-
-    // Construct y axis
-    // 
-    static inline const Vector4 yAxis( );
-
-    // Construct z axis
-    // 
-    static inline const Vector4 zAxis( );
-
-    // Construct w axis
-    // 
-    static inline const Vector4 wAxis( );
-
-};
-
-// Multiply a 4-D vector by a scalar
-// 
-inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec );
-
-// Multiply two 4-D vectors per element
-// 
-inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Divide two 4-D vectors per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the reciprocal of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Vector4 recipPerElem( const Vector4 & vec );
-
-// Compute the square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Vector4 sqrtPerElem( const Vector4 & vec );
-
-// Compute the reciprocal square root of a 4-D vector per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Vector4 rsqrtPerElem( const Vector4 & vec );
-
-// Compute the absolute value of a 4-D vector per element
-// 
-inline const Vector4 absPerElem( const Vector4 & vec );
-
-// Copy sign from one 4-D vector to another, per element
-// 
-inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum of two 4-D vectors per element
-// 
-inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Minimum of two 4-D vectors per element
-// 
-inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Maximum element of a 4-D vector
-// 
-inline vec_float4 maxElem( const Vector4 & vec );
-
-// Minimum element of a 4-D vector
-// 
-inline vec_float4 minElem( const Vector4 & vec );
-
-// Compute the sum of all elements of a 4-D vector
-// 
-inline vec_float4 sum( const Vector4 & vec );
-
-// Compute the dot product of two 4-D vectors
-// 
-inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Compute the square of the length of a 4-D vector
-// 
-inline vec_float4 lengthSqr( const Vector4 & vec );
-
-// Compute the length of a 4-D vector
-// 
-inline vec_float4 length( const Vector4 & vec );
-
-// Normalize a 4-D vector
-// NOTE: 
-// The result is unpredictable when all elements of vec are at or near zero.
-// 
-inline const Vector4 normalize( const Vector4 & vec );
-
-// Outer product of two 4-D vectors
-// 
-inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
-
-// Linear interpolation between two 4-D vectors
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 );
-
-// Spherical linear interpolation between two 4-D vectors
-// NOTE: 
-// The result is unpredictable if the vectors point in opposite directions.
-// Does not clamp t between 0 and 1.
-// 
-inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
-
-// Conditionally select between two 4-D vectors
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 );
-
-// Store four slots of an SoA 4-D vector as half-floats
-// 
-inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4-D vector
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec );
-
-// Print a 4-D vector and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Vector4 & vec, const char * name );
-
-#endif
-
-// A set of four 3-D points in structure-of-arrays format
-//
-class Point3
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Point3( ) { };
-
-    // Copy a 3-D point
-    // 
-    inline Point3( const Point3 & pnt );
-
-    // Construct a 3-D point from x, y, and z elements
-    // 
-    inline Point3( vec_float4 x, vec_float4 y, vec_float4 z );
-
-    // Copy elements from a 3-D vector into a 3-D point
-    // 
-    explicit inline Point3( const Vector3 & vec );
-
-    // Set all elements of a 3-D point to the same scalar value
-    // 
-    explicit inline Point3( vec_float4 scalar );
-
-    // Replicate an AoS 3-D point
-    // 
-    inline Point3( Aos::Point3 pnt );
-
-    // Insert four AoS 3-D points
-    // 
-    inline Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 );
-
-    // Extract four AoS 3-D points
-    // 
-    inline void get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const;
-
-    // Assign one 3-D point to another
-    // 
-    inline Point3 & operator =( const Point3 & pnt );
-
-    // Set the x element of a 3-D point
-    // 
-    inline Point3 & setX( vec_float4 x );
-
-    // Set the y element of a 3-D point
-    // 
-    inline Point3 & setY( vec_float4 y );
-
-    // Set the z element of a 3-D point
-    // 
-    inline Point3 & setZ( vec_float4 z );
-
-    // Get the x element of a 3-D point
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a 3-D point
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a 3-D point
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Set an x, y, or z element of a 3-D point by index
-    // 
-    inline Point3 & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, or z element of a 3-D point by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Subtract a 3-D point from another 3-D point
-    // 
-    inline const Vector3 operator -( const Point3 & pnt ) const;
-
-    // Add a 3-D point to a 3-D vector
-    // 
-    inline const Point3 operator +( const Vector3 & vec ) const;
-
-    // Subtract a 3-D vector from a 3-D point
-    // 
-    inline const Point3 operator -( const Vector3 & vec ) const;
-
-    // Perform compound assignment and addition with a 3-D vector
-    // 
-    inline Point3 & operator +=( const Vector3 & vec );
-
-    // Perform compound assignment and subtraction by a 3-D vector
-    // 
-    inline Point3 & operator -=( const Vector3 & vec );
-
-};
-
-// Multiply two 3-D points per element
-// 
-inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Divide two 3-D points per element
-// NOTE: 
-// Floating-point behavior matches standard library function divf4.
-// 
-inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the reciprocal of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function recipf4.
-// 
-inline const Point3 recipPerElem( const Point3 & pnt );
-
-// Compute the square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function sqrtf4.
-// 
-inline const Point3 sqrtPerElem( const Point3 & pnt );
-
-// Compute the reciprocal square root of a 3-D point per element
-// NOTE: 
-// Floating-point behavior matches standard library function rsqrtf4.
-// 
-inline const Point3 rsqrtPerElem( const Point3 & pnt );
-
-// Compute the absolute value of a 3-D point per element
-// 
-inline const Point3 absPerElem( const Point3 & pnt );
-
-// Copy sign from one 3-D point to another, per element
-// 
-inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum of two 3-D points per element
-// 
-inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Minimum of two 3-D points per element
-// 
-inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Maximum element of a 3-D point
-// 
-inline vec_float4 maxElem( const Point3 & pnt );
-
-// Minimum element of a 3-D point
-// 
-inline vec_float4 minElem( const Point3 & pnt );
-
-// Compute the sum of all elements of a 3-D point
-// 
-inline vec_float4 sum( const Point3 & pnt );
-
-// Apply uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal );
-
-// Apply non-uniform scale to a 3-D point
-// 
-inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
-
-// Scalar projection of a 3-D point on a unit-length 3-D vector
-// 
-inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec );
-
-// Compute the square of the distance of a 3-D point from the coordinate-system origin
-// 
-inline vec_float4 distSqrFromOrigin( const Point3 & pnt );
-
-// Compute the distance of a 3-D point from the coordinate-system origin
-// 
-inline vec_float4 distFromOrigin( const Point3 & pnt );
-
-// Compute the square of the distance between two 3-D points
-// 
-inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Compute the distance between two 3-D points
-// 
-inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 );
-
-// Linear interpolation between two 3-D points
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 );
-
-// Conditionally select between two 3-D points
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 );
-
-// Load four three-float 3-D points, stored in three quadwords
-// 
-inline void loadXYZArray( Point3 & pnt, const vec_float4 * threeQuads );
-
-// Store four slots of an SoA 3-D point in three quadwords
-// 
-inline void storeXYZArray( const Point3 & pnt, vec_float4 * threeQuads );
-
-// Store eight slots of two SoA 3-D points as half-floats
-// 
-inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3-D point
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt );
-
-// Print a 3-D point and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Point3 & pnt, const char * name );
-
-#endif
-
-// A set of four quaternions in structure-of-arrays format
-//
-class Quat
-{
-    typedef vec_float4 vec_float4_t;
-    vec_float4 mX;
-    vec_float4 mY;
-    vec_float4 mZ;
-    vec_float4 mW;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Quat( ) { };
-
-    // Copy a quaternion
-    // 
-    inline Quat( const Quat & quat );
-
-    // Construct a quaternion from x, y, z, and w elements
-    // 
-    inline Quat( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
-
-    // Construct a quaternion from a 3-D vector and a scalar
-    // 
-    inline Quat( const Vector3 & xyz, vec_float4 w );
-
-    // Copy elements from a 4-D vector into a quaternion
-    // 
-    explicit inline Quat( const Vector4 & vec );
-
-    // Convert a rotation matrix to a unit-length quaternion
-    // 
-    explicit inline Quat( const Matrix3 & rotMat );
-
-    // Set all elements of a quaternion to the same scalar value
-    // 
-    explicit inline Quat( vec_float4 scalar );
-
-    // Replicate an AoS quaternion
-    // 
-    inline Quat( Aos::Quat quat );
-
-    // Insert four AoS quaternions
-    // 
-    inline Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 );
-
-    // Extract four AoS quaternions
-    // 
-    inline void get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const;
-
-    // Assign one quaternion to another
-    // 
-    inline Quat & operator =( const Quat & quat );
-
-    // Set the x, y, and z elements of a quaternion
-    // NOTE: 
-    // This function does not change the w element.
-    // 
-    inline Quat & setXYZ( const Vector3 & vec );
-
-    // Get the x, y, and z elements of a quaternion
-    // 
-    inline const Vector3 getXYZ( ) const;
-
-    // Set the x element of a quaternion
-    // 
-    inline Quat & setX( vec_float4 x );
-
-    // Set the y element of a quaternion
-    // 
-    inline Quat & setY( vec_float4 y );
-
-    // Set the z element of a quaternion
-    // 
-    inline Quat & setZ( vec_float4 z );
-
-    // Set the w element of a quaternion
-    // 
-    inline Quat & setW( vec_float4 w );
-
-    // Get the x element of a quaternion
-    // 
-    inline vec_float4 getX( ) const;
-
-    // Get the y element of a quaternion
-    // 
-    inline vec_float4 getY( ) const;
-
-    // Get the z element of a quaternion
-    // 
-    inline vec_float4 getZ( ) const;
-
-    // Get the w element of a quaternion
-    // 
-    inline vec_float4 getW( ) const;
-
-    // Set an x, y, z, or w element of a quaternion by index
-    // 
-    inline Quat & setElem( int idx, vec_float4 value );
-
-    // Get an x, y, z, or w element of a quaternion by index
-    // 
-    inline vec_float4 getElem( int idx ) const;
-
-    // Subscripting operator to set or get an element
-    // 
-    inline vec_float4_t & operator []( int idx );
-
-    // Subscripting operator to get an element
-    // 
-    inline vec_float4 operator []( int idx ) const;
-
-    // Add two quaternions
-    // 
-    inline const Quat operator +( const Quat & quat ) const;
-
-    // Subtract a quaternion from another quaternion
-    // 
-    inline const Quat operator -( const Quat & quat ) const;
-
-    // Multiply two quaternions
-    // 
-    inline const Quat operator *( const Quat & quat ) const;
-
-    // Multiply a quaternion by a scalar
-    // 
-    inline const Quat operator *( vec_float4 scalar ) const;
-
-    // Divide a quaternion by a scalar
-    // 
-    inline const Quat operator /( vec_float4 scalar ) const;
-
-    // Perform compound assignment and addition with a quaternion
-    // 
-    inline Quat & operator +=( const Quat & quat );
-
-    // Perform compound assignment and subtraction by a quaternion
-    // 
-    inline Quat & operator -=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a quaternion
-    // 
-    inline Quat & operator *=( const Quat & quat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Quat & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and division by a scalar
-    // 
-    inline Quat & operator /=( vec_float4 scalar );
-
-    // Negate all elements of a quaternion
-    // 
-    inline const Quat operator -( ) const;
-
-    // Construct an identity quaternion
-    // 
-    static inline const Quat identity( );
-
-    // Construct a quaternion to rotate between two unit-length 3-D vectors
-    // NOTE: 
-    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
-    // 
-    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
-
-    // Construct a quaternion to rotate around a unit-length 3-D vector
-    // 
-    static inline const Quat rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a quaternion to rotate around the x axis
-    // 
-    static inline const Quat rotationX( vec_float4 radians );
-
-    // Construct a quaternion to rotate around the y axis
-    // 
-    static inline const Quat rotationY( vec_float4 radians );
-
-    // Construct a quaternion to rotate around the z axis
-    // 
-    static inline const Quat rotationZ( vec_float4 radians );
-
-};
-
-// Multiply a quaternion by a scalar
-// 
-inline const Quat operator *( vec_float4 scalar, const Quat & quat );
-
-// Compute the conjugate of a quaternion
-// 
-inline const Quat conj( const Quat & quat );
-
-// Use a unit-length quaternion to rotate a 3-D vector
-// 
-inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
-
-// Compute the dot product of two quaternions
-// 
-inline vec_float4 dot( const Quat & quat0, const Quat & quat1 );
-
-// Compute the norm of a quaternion
-// 
-inline vec_float4 norm( const Quat & quat );
-
-// Compute the length of a quaternion
-// 
-inline vec_float4 length( const Quat & quat );
-
-// Normalize a quaternion
-// NOTE: 
-// The result is unpredictable when all elements of quat are at or near zero.
-// 
-inline const Quat normalize( const Quat & quat );
-
-// Linear interpolation between two quaternions
-// NOTE: 
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 );
-
-// Spherical linear interpolation between two quaternions
-// NOTE: 
-// Interpolates along the shortest path between orientations.
-// Does not clamp t between 0 and 1.
-// 
-inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 );
-
-// Spherical quadrangle interpolation
-// 
-inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
-
-// Conditionally select between two quaternions
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a quaternion
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat );
-
-// Print a quaternion and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Quat & quat, const char * name );
-
-#endif
-
-// A set of four 3x3 matrices in structure-of-arrays format
-//
-class Matrix3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix3( ) { };
-
-    // Copy a 3x3 matrix
-    // 
-    inline Matrix3( const Matrix3 & mat );
-
-    // Construct a 3x3 matrix containing the specified columns
-    // 
-    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
-
-    // Construct a 3x3 rotation matrix from a unit-length quaternion
-    // 
-    explicit inline Matrix3( const Quat & unitQuat );
-
-    // Set all elements of a 3x3 matrix to the same scalar value
-    // 
-    explicit inline Matrix3( vec_float4 scalar );
-
-    // Replicate an AoS 3x3 matrix
-    // 
-    inline Matrix3( const Aos::Matrix3 & mat );
-
-    // Insert four AoS 3x3 matrices
-    // 
-    inline Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 );
-
-    // Extract four AoS 3x3 matrices
-    // 
-    inline void get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const;
-
-    // Assign one 3x3 matrix to another
-    // 
-    inline Matrix3 & operator =( const Matrix3 & mat );
-
-    // Set column 0 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x3 matrix
-    // 
-    inline Matrix3 & setCol2( const Vector3 & col2 );
-
-    // Get column 0 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x3 matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Set the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline Matrix3 & setRow( int row, const Vector3 & vec );
-
-    // Get the column of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x3 matrix referred to by the specified index
-    // 
-    inline const Vector3 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline Matrix3 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 3x3 matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Add two 3x3 matrices
-    // 
-    inline const Matrix3 operator +( const Matrix3 & mat ) const;
-
-    // Subtract a 3x3 matrix from another 3x3 matrix
-    // 
-    inline const Matrix3 operator -( const Matrix3 & mat ) const;
-
-    // Negate all elements of a 3x3 matrix
-    // 
-    inline const Matrix3 operator -( ) const;
-
-    // Multiply a 3x3 matrix by a scalar
-    // 
-    inline const Matrix3 operator *( vec_float4 scalar ) const;
-
-    // Multiply a 3x3 matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply two 3x3 matrices
-    // 
-    inline const Matrix3 operator *( const Matrix3 & mat ) const;
-
-    // Perform compound assignment and addition with a 3x3 matrix
-    // 
-    inline Matrix3 & operator +=( const Matrix3 & mat );
-
-    // Perform compound assignment and subtraction by a 3x3 matrix
-    // 
-    inline Matrix3 & operator -=( const Matrix3 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix3 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and multiplication by a 3x3 matrix
-    // 
-    inline Matrix3 & operator *=( const Matrix3 & mat );
-
-    // Construct an identity 3x3 matrix
-    // 
-    static inline const Matrix3 identity( );
-
-    // Construct a 3x3 matrix to rotate around the x axis
-    // 
-    static inline const Matrix3 rotationX( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the y axis
-    // 
-    static inline const Matrix3 rotationY( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the z axis
-    // 
-    static inline const Matrix3 rotationZ( vec_float4 radians );
-
-    // Construct a 3x3 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix3 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x3 matrix to perform scaling
-    // 
-    static inline const Matrix3 scale( const Vector3 & scaleVec );
-
-};
-// Multiply a 3x3 matrix by a scalar
-// 
-inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat );
-
-// Append (post-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
-
-// Multiply two 3x3 matrices per element
-// 
-inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
-
-// Compute the absolute value of a 3x3 matrix per element
-// 
-inline const Matrix3 absPerElem( const Matrix3 & mat );
-
-// Transpose of a 3x3 matrix
-// 
-inline const Matrix3 transpose( const Matrix3 & mat );
-
-// Compute the inverse of a 3x3 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix3 inverse( const Matrix3 & mat );
-
-// Determinant of a 3x3 matrix
-// 
-inline vec_float4 determinant( const Matrix3 & mat );
-
-// Conditionally select between two 3x3 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x3 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat );
-
-// Print a 3x3 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix3 & mat, const char * name );
-
-#endif
-
-// A set of four 4x4 matrices in structure-of-arrays format
-//
-class Matrix4
-{
-    Vector4 mCol0;
-    Vector4 mCol1;
-    Vector4 mCol2;
-    Vector4 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Matrix4( ) { };
-
-    // Copy a 4x4 matrix
-    // 
-    inline Matrix4( const Matrix4 & mat );
-
-    // Construct a 4x4 matrix containing the specified columns
-    // 
-    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
-
-    // Construct a 4x4 matrix from a 3x4 transformation matrix
-    // 
-    explicit inline Matrix4( const Transform3 & mat );
-
-    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
-
-    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 4x4 matrix to the same scalar value
-    // 
-    explicit inline Matrix4( vec_float4 scalar );
-
-    // Replicate an AoS 4x4 matrix
-    // 
-    inline Matrix4( const Aos::Matrix4 & mat );
-
-    // Insert four AoS 4x4 matrices
-    // 
-    inline Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 );
-
-    // Extract four AoS 4x4 matrices
-    // 
-    inline void get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const;
-
-    // Assign one 4x4 matrix to another
-    // 
-    inline Matrix4 & operator =( const Matrix4 & mat );
-
-    // Set the upper-left 3x3 submatrix
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 4x4 matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // NOTE: 
-    // This function does not change the bottom row elements.
-    // 
-    inline Matrix4 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 4x4 matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol0( const Vector4 & col0 );
-
-    // Set column 1 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol1( const Vector4 & col1 );
-
-    // Set column 2 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol2( const Vector4 & col2 );
-
-    // Set column 3 of a 4x4 matrix
-    // 
-    inline Matrix4 & setCol3( const Vector4 & col3 );
-
-    // Get column 0 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol0( ) const;
-
-    // Get column 1 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol1( ) const;
-
-    // Get column 2 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol2( ) const;
-
-    // Get column 3 of a 4x4 matrix
-    // 
-    inline const Vector4 getCol3( ) const;
-
-    // Set the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setCol( int col, const Vector4 & vec );
-
-    // Set the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline Matrix4 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getCol( int col ) const;
-
-    // Get the row of a 4x4 matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector4 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector4 operator []( int col ) const;
-
-    // Set the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline Matrix4 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 4x4 matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Add two 4x4 matrices
-    // 
-    inline const Matrix4 operator +( const Matrix4 & mat ) const;
-
-    // Subtract a 4x4 matrix from another 4x4 matrix
-    // 
-    inline const Matrix4 operator -( const Matrix4 & mat ) const;
-
-    // Negate all elements of a 4x4 matrix
-    // 
-    inline const Matrix4 operator -( ) const;
-
-    // Multiply a 4x4 matrix by a scalar
-    // 
-    inline const Matrix4 operator *( vec_float4 scalar ) const;
-
-    // Multiply a 4x4 matrix by a 4-D vector
-    // 
-    inline const Vector4 operator *( const Vector4 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D vector
-    // 
-    inline const Vector4 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 4x4 matrix by a 3-D point
-    // 
-    inline const Vector4 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 4x4 matrices
-    // 
-    inline const Matrix4 operator *( const Matrix4 & mat ) const;
-
-    // Multiply a 4x4 matrix by a 3x4 transformation matrix
-    // 
-    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and addition with a 4x4 matrix
-    // 
-    inline Matrix4 & operator +=( const Matrix4 & mat );
-
-    // Perform compound assignment and subtraction by a 4x4 matrix
-    // 
-    inline Matrix4 & operator -=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a scalar
-    // 
-    inline Matrix4 & operator *=( vec_float4 scalar );
-
-    // Perform compound assignment and multiplication by a 4x4 matrix
-    // 
-    inline Matrix4 & operator *=( const Matrix4 & mat );
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Matrix4 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 4x4 matrix
-    // 
-    static inline const Matrix4 identity( );
-
-    // Construct a 4x4 matrix to rotate around the x axis
-    // 
-    static inline const Matrix4 rotationX( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the y axis
-    // 
-    static inline const Matrix4 rotationY( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the z axis
-    // 
-    static inline const Matrix4 rotationZ( vec_float4 radians );
-
-    // Construct a 4x4 matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Matrix4 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Matrix4 rotation( const Quat & unitQuat );
-
-    // Construct a 4x4 matrix to perform scaling
-    // 
-    static inline const Matrix4 scale( const Vector3 & scaleVec );
-
-    // Construct a 4x4 matrix to perform translation
-    // 
-    static inline const Matrix4 translation( const Vector3 & translateVec );
-
-    // Construct viewing matrix based on eye position, position looked at, and up direction
-    // 
-    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
-
-    // Construct a perspective projection matrix
-    // 
-    static inline const Matrix4 perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
-
-    // Construct a perspective projection matrix based on frustum
-    // 
-    static inline const Matrix4 frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-    // Construct an orthographic projection matrix
-    // 
-    static inline const Matrix4 orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
-
-};
-// Multiply a 4x4 matrix by a scalar
-// 
-inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat );
-
-// Append (post-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
-
-// Multiply two 4x4 matrices per element
-// 
-inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
-
-// Compute the absolute value of a 4x4 matrix per element
-// 
-inline const Matrix4 absPerElem( const Matrix4 & mat );
-
-// Transpose of a 4x4 matrix
-// 
-inline const Matrix4 transpose( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix
-// NOTE: 
-// Result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 inverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
-// 
-inline const Matrix4 affineInverse( const Matrix4 & mat );
-
-// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
-// 
-inline const Matrix4 orthoInverse( const Matrix4 & mat );
-
-// Determinant of a 4x4 matrix
-// 
-inline vec_float4 determinant( const Matrix4 & mat );
-
-// Conditionally select between two 4x4 matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 4x4 matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat );
-
-// Print a 4x4 matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Matrix4 & mat, const char * name );
-
-#endif
-
-// A set of four 3x4 transformation matrices in structure-of-arrays format
-//
-class Transform3
-{
-    Vector3 mCol0;
-    Vector3 mCol1;
-    Vector3 mCol2;
-    Vector3 mCol3;
-
-public:
-    // Default constructor; does no initialization
-    // 
-    inline Transform3( ) { };
-
-    // Copy a 3x4 transformation matrix
-    // 
-    inline Transform3( const Transform3 & tfrm );
-
-    // Construct a 3x4 transformation matrix containing the specified columns
-    // 
-    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
-
-    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
-    // 
-    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
-
-    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
-    // 
-    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
-
-    // Set all elements of a 3x4 transformation matrix to the same scalar value
-    // 
-    explicit inline Transform3( vec_float4 scalar );
-
-    // Replicate an AoS 3x4 transformation matrix
-    // 
-    inline Transform3( const Aos::Transform3 & tfrm );
-
-    // Insert four AoS 3x4 transformation matrices
-    // 
-    inline Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 );
-
-    // Extract four AoS 3x4 transformation matrices
-    // 
-    inline void get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const;
-
-    // Assign one 3x4 transformation matrix to another
-    // 
-    inline Transform3 & operator =( const Transform3 & tfrm );
-
-    // Set the upper-left 3x3 submatrix
-    // 
-    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
-
-    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
-    // 
-    inline const Matrix3 getUpper3x3( ) const;
-
-    // Set translation component
-    // 
-    inline Transform3 & setTranslation( const Vector3 & translateVec );
-
-    // Get the translation component of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getTranslation( ) const;
-
-    // Set column 0 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol0( const Vector3 & col0 );
-
-    // Set column 1 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol1( const Vector3 & col1 );
-
-    // Set column 2 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol2( const Vector3 & col2 );
-
-    // Set column 3 of a 3x4 transformation matrix
-    // 
-    inline Transform3 & setCol3( const Vector3 & col3 );
-
-    // Get column 0 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol0( ) const;
-
-    // Get column 1 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol1( ) const;
-
-    // Get column 2 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol2( ) const;
-
-    // Get column 3 of a 3x4 transformation matrix
-    // 
-    inline const Vector3 getCol3( ) const;
-
-    // Set the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setCol( int col, const Vector3 & vec );
-
-    // Set the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline Transform3 & setRow( int row, const Vector4 & vec );
-
-    // Get the column of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector3 getCol( int col ) const;
-
-    // Get the row of a 3x4 transformation matrix referred to by the specified index
-    // 
-    inline const Vector4 getRow( int row ) const;
-
-    // Subscripting operator to set or get a column
-    // 
-    inline Vector3 & operator []( int col );
-
-    // Subscripting operator to get a column
-    // 
-    inline const Vector3 operator []( int col ) const;
-
-    // Set the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline Transform3 & setElem( int col, int row, vec_float4 val );
-
-    // Get the element of a 3x4 transformation matrix referred to by column and row indices
-    // 
-    inline vec_float4 getElem( int col, int row ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D vector
-    // 
-    inline const Vector3 operator *( const Vector3 & vec ) const;
-
-    // Multiply a 3x4 transformation matrix by a 3-D point
-    // 
-    inline const Point3 operator *( const Point3 & pnt ) const;
-
-    // Multiply two 3x4 transformation matrices
-    // 
-    inline const Transform3 operator *( const Transform3 & tfrm ) const;
-
-    // Perform compound assignment and multiplication by a 3x4 transformation matrix
-    // 
-    inline Transform3 & operator *=( const Transform3 & tfrm );
-
-    // Construct an identity 3x4 transformation matrix
-    // 
-    static inline const Transform3 identity( );
-
-    // Construct a 3x4 transformation matrix to rotate around the x axis
-    // 
-    static inline const Transform3 rotationX( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the y axis
-    // 
-    static inline const Transform3 rotationY( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the z axis
-    // 
-    static inline const Transform3 rotationZ( vec_float4 radians );
-
-    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
-    // 
-    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
-
-    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
-    // 
-    static inline const Transform3 rotation( vec_float4 radians, const Vector3 & unitVec );
-
-    // Construct a rotation matrix from a unit-length quaternion
-    // 
-    static inline const Transform3 rotation( const Quat & unitQuat );
-
-    // Construct a 3x4 transformation matrix to perform scaling
-    // 
-    static inline const Transform3 scale( const Vector3 & scaleVec );
-
-    // Construct a 3x4 transformation matrix to perform translation
-    // 
-    static inline const Transform3 translation( const Vector3 & translateVec );
-
-};
-// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
-
-// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
-// NOTE: 
-// Faster than creating and multiplying a scale transformation matrix.
-// 
-inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
-
-// Multiply two 3x4 transformation matrices per element
-// 
-inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
-
-// Compute the absolute value of a 3x4 transformation matrix per element
-// 
-inline const Transform3 absPerElem( const Transform3 & tfrm );
-
-// Inverse of a 3x4 transformation matrix
-// NOTE: 
-// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
-// 
-inline const Transform3 inverse( const Transform3 & tfrm );
-
-// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
-// NOTE: 
-// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
-// 
-inline const Transform3 orthoInverse( const Transform3 & tfrm );
-
-// Conditionally select between two 3x4 transformation matrices
-// NOTE: 
-// This function uses a conditional select instruction to avoid a branch.
-// 
-inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 );
-
-#ifdef _VECTORMATH_DEBUG
-
-// Print a 3x4 transformation matrix
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm );
-
-// Print a 3x4 transformation matrix and an associated string identifier
-// NOTE: 
-// Function is only defined when _VECTORMATH_DEBUG is defined.
-// 
-inline void print( const Transform3 & tfrm, const char * name );
-
-#endif
-
-} // namespace Soa
-} // namespace Vectormath
-
-#include "vec_soa.h"
-#include "quat_soa.h"
-#include "mat_soa.h"
-
-#endif
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_SOA_CPP_SPU_H
+#define _VECTORMATH_SOA_CPP_SPU_H
+
+#include <math.h>
+#include <spu_intrinsics.h>
+#include "floatInVec.h"
+#include "boolInVec.h"
+#include "vectormath_aos.h"
+#include <stdio.h>
+
+#ifdef _VECTORMATH_DEBUG
+#endif
+
+namespace Vectormath {
+
+namespace Soa {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A set of four 3-D vectors in structure-of-arrays format
+//
+class Vector3
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Copy a 3-D vector
+    // 
+    inline Vector3( const Vector3 & vec );
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( vec_float4 x, vec_float4 y, vec_float4 z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( const Point3 & pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( vec_float4 scalar );
+
+    // Replicate an AoS 3-D vector
+    // 
+    inline Vector3( Aos::Vector3 vec );
+
+    // Insert four AoS 3-D vectors
+    // 
+    inline Vector3( Aos::Vector3 vec0, Aos::Vector3 vec1, Aos::Vector3 vec2, Aos::Vector3 vec3 );
+
+    // Extract four AoS 3-D vectors
+    // 
+    inline void get4Aos( Aos::Vector3 & result0, Aos::Vector3 & result1, Aos::Vector3 & result2, Aos::Vector3 & result3 ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( const Vector3 & vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( vec_float4 x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( vec_float4 y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( vec_float4 z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( const Vector3 & vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( const Point3 & pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( vec_float4 scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( const Vector3 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( vec_float4 scalar, const Vector3 & vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( const Vector3 & vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( const Vector3 & vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( const Vector3 & vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( const Vector3 & vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline vec_float4 maxElem( const Vector3 & vec );
+
+// Minimum element of a 3-D vector
+// 
+inline vec_float4 minElem( const Vector3 & vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline vec_float4 sum( const Vector3 & vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline vec_float4 dot( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline vec_float4 lengthSqr( const Vector3 & vec );
+
+// Compute the length of a 3-D vector
+// 
+inline vec_float4 length( const Vector3 & vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( const Vector3 & vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// 
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( const Vector3 & vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( vec_float4 t, const Vector3 & vec0, const Vector3 & vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( vec_float4 t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, vec_uint4 select1 );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+inline void loadXYZArray( Vector3 & vec, const vec_float4 * threeQuads );
+
+// Store four slots of an SoA 3-D vector in three quadwords
+// 
+inline void storeXYZArray( const Vector3 & vec, vec_float4 * threeQuads );
+
+// Store eight slots of two SoA 3-D vectors as half-floats
+// 
+inline void storeHalfFloats( const Vector3 & vec0, const Vector3 & vec1, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec, const char * name );
+
+#endif
+
+// A set of four 4-D vectors in structure-of-arrays format
+//
+class Vector4
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+    vec_float4 mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Copy a 4-D vector
+    // 
+    inline Vector4( const Vector4 & vec );
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( const Vector3 & xyz, vec_float4 w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( const Vector3 & vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( const Point3 & pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( const Quat & quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( vec_float4 scalar );
+
+    // Replicate an AoS 4-D vector
+    // 
+    inline Vector4( Aos::Vector4 vec );
+
+    // Insert four AoS 4-D vectors
+    // 
+    inline Vector4( Aos::Vector4 vec0, Aos::Vector4 vec1, Aos::Vector4 vec2, Aos::Vector4 vec3 );
+
+    // Extract four AoS 4-D vectors
+    // 
+    inline void get4Aos( Aos::Vector4 & result0, Aos::Vector4 & result1, Aos::Vector4 & result2, Aos::Vector4 & result3 ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( const Vector4 & vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( vec_float4 x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( vec_float4 y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( vec_float4 z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( vec_float4 w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline vec_float4 getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( const Vector4 & vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( const Vector4 & vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( vec_float4 scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( const Vector4 & vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( const Vector4 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( vec_float4 scalar, const Vector4 & vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( const Vector4 & vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( const Vector4 & vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( const Vector4 & vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( const Vector4 & vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline vec_float4 maxElem( const Vector4 & vec );
+
+// Minimum element of a 4-D vector
+// 
+inline vec_float4 minElem( const Vector4 & vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline vec_float4 sum( const Vector4 & vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline vec_float4 dot( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline vec_float4 lengthSqr( const Vector4 & vec );
+
+// Compute the length of a 4-D vector
+// 
+inline vec_float4 length( const Vector4 & vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( const Vector4 & vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( vec_float4 t, const Vector4 & vec0, const Vector4 & vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( vec_float4 t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, vec_uint4 select1 );
+
+// Store four slots of an SoA 4-D vector as half-floats
+// 
+inline void storeHalfFloats( const Vector4 & vec, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec, const char * name );
+
+#endif
+
+// A set of four 3-D points in structure-of-arrays format
+//
+class Point3
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Copy a 3-D point
+    // 
+    inline Point3( const Point3 & pnt );
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( vec_float4 x, vec_float4 y, vec_float4 z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( const Vector3 & vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( vec_float4 scalar );
+
+    // Replicate an AoS 3-D point
+    // 
+    inline Point3( Aos::Point3 pnt );
+
+    // Insert four AoS 3-D points
+    // 
+    inline Point3( Aos::Point3 pnt0, Aos::Point3 pnt1, Aos::Point3 pnt2, Aos::Point3 pnt3 );
+
+    // Extract four AoS 3-D points
+    // 
+    inline void get4Aos( Aos::Point3 & result0, Aos::Point3 & result1, Aos::Point3 & result2, Aos::Point3 & result3 ) const;
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( const Point3 & pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( vec_float4 x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( vec_float4 y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( vec_float4 z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( const Point3 & pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( const Vector3 & vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( const Vector3 & vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( const Point3 & pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( const Point3 & pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( const Point3 & pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( const Point3 & pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline vec_float4 maxElem( const Point3 & pnt );
+
+// Minimum element of a 3-D point
+// 
+inline vec_float4 minElem( const Point3 & pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline vec_float4 sum( const Point3 & pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, vec_float4 scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline vec_float4 projection( const Point3 & pnt, const Vector3 & unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline vec_float4 distSqrFromOrigin( const Point3 & pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline vec_float4 distFromOrigin( const Point3 & pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline vec_float4 distSqr( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline vec_float4 dist( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( vec_float4 t, const Point3 & pnt0, const Point3 & pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, vec_uint4 select1 );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+inline void loadXYZArray( Point3 & pnt, const vec_float4 * threeQuads );
+
+// Store four slots of an SoA 3-D point in three quadwords
+// 
+inline void storeXYZArray( const Point3 & pnt, vec_float4 * threeQuads );
+
+// Store eight slots of two SoA 3-D points as half-floats
+// 
+inline void storeHalfFloats( const Point3 & pnt0, const Point3 & pnt1, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt, const char * name );
+
+#endif
+
+// A set of four quaternions in structure-of-arrays format
+//
+class Quat
+{
+    typedef vec_float4 vec_float4_t;
+    vec_float4 mX;
+    vec_float4 mY;
+    vec_float4 mZ;
+    vec_float4 mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Copy a quaternion
+    // 
+    inline Quat( const Quat & quat );
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( vec_float4 x, vec_float4 y, vec_float4 z, vec_float4 w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( const Vector3 & xyz, vec_float4 w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( const Vector4 & vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( vec_float4 scalar );
+
+    // Replicate an AoS quaternion
+    // 
+    inline Quat( Aos::Quat quat );
+
+    // Insert four AoS quaternions
+    // 
+    inline Quat( Aos::Quat quat0, Aos::Quat quat1, Aos::Quat quat2, Aos::Quat quat3 );
+
+    // Extract four AoS quaternions
+    // 
+    inline void get4Aos( Aos::Quat & result0, Aos::Quat & result1, Aos::Quat & result2, Aos::Quat & result3 ) const;
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( const Quat & quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( vec_float4 x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( vec_float4 y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( vec_float4 z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( vec_float4 w );
+
+    // Get the x element of a quaternion
+    // 
+    inline vec_float4 getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline vec_float4 getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline vec_float4 getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline vec_float4 getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, vec_float4 value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline vec_float4 getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline vec_float4_t & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline vec_float4 operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( const Quat & quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( const Quat & quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( const Quat & quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( vec_float4 scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( vec_float4 scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( const Quat & quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( vec_float4 scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( vec_float4 radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( vec_float4 radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( vec_float4 radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( vec_float4 scalar, const Quat & quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( const Quat & quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
+
+// Compute the dot product of two quaternions
+// 
+inline vec_float4 dot( const Quat & quat0, const Quat & quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline vec_float4 norm( const Quat & quat );
+
+// Compute the length of a quaternion
+// 
+inline vec_float4 length( const Quat & quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( const Quat & quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( vec_float4 t, const Quat & quat0, const Quat & quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( vec_float4 t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Quat select( const Quat & quat0, const Quat & quat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat, const char * name );
+
+#endif
+
+// A set of four 3x3 matrices in structure-of-arrays format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( const Quat & unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( vec_float4 scalar );
+
+    // Replicate an AoS 3x3 matrix
+    // 
+    inline Matrix3( const Aos::Matrix3 & mat );
+
+    // Insert four AoS 3x3 matrices
+    // 
+    inline Matrix3( const Aos::Matrix3 & mat0, const Aos::Matrix3 & mat1, const Aos::Matrix3 & mat2, const Aos::Matrix3 & mat3 );
+
+    // Extract four AoS 3x3 matrices
+    // 
+    inline void get4Aos( Aos::Matrix3 & result0, Aos::Matrix3 & result1, Aos::Matrix3 & result2, Aos::Matrix3 & result3 ) const;
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( const Vector3 & col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, const Vector3 & vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( vec_float4 scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( vec_float4 radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( const Vector3 & scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( vec_float4 scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline vec_float4 determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A set of four 4x4 matrices in structure-of-arrays format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( vec_float4 scalar );
+
+    // Replicate an AoS 4x4 matrix
+    // 
+    inline Matrix4( const Aos::Matrix4 & mat );
+
+    // Insert four AoS 4x4 matrices
+    // 
+    inline Matrix4( const Aos::Matrix4 & mat0, const Aos::Matrix4 & mat1, const Aos::Matrix4 & mat2, const Aos::Matrix4 & mat3 );
+
+    // Extract four AoS 4x4 matrices
+    // 
+    inline void get4Aos( Aos::Matrix4 & result0, Aos::Matrix4 & result1, Aos::Matrix4 & result2, Aos::Matrix4 & result3 ) const;
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( const Vector4 & col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( const Vector4 & col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( const Vector4 & col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( const Vector4 & col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, const Vector4 & vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( vec_float4 scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( const Vector4 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( vec_float4 scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( vec_float4 radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( const Quat & unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( const Vector3 & scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( const Vector3 & translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( vec_float4 fovyRadians, vec_float4 aspect, vec_float4 zNear, vec_float4 zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( vec_float4 left, vec_float4 right, vec_float4 bottom, vec_float4 top, vec_float4 zNear, vec_float4 zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( vec_float4 scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline vec_float4 determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A set of four 3x4 transformation matrices in structure-of-arrays format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( vec_float4 scalar );
+
+    // Replicate an AoS 3x4 transformation matrix
+    // 
+    inline Transform3( const Aos::Transform3 & tfrm );
+
+    // Insert four AoS 3x4 transformation matrices
+    // 
+    inline Transform3( const Aos::Transform3 & tfrm0, const Aos::Transform3 & tfrm1, const Aos::Transform3 & tfrm2, const Aos::Transform3 & tfrm3 );
+
+    // Extract four AoS 3x4 transformation matrices
+    // 
+    inline void get4Aos( Aos::Transform3 & result0, Aos::Transform3 & result1, Aos::Transform3 & result2, Aos::Transform3 & result3 ) const;
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( const Vector3 & col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( const Vector3 & col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, vec_float4 val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline vec_float4 getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( vec_float4 radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( vec_float4 radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( const Vector3 & scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( const Vector3 & translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, vec_uint4 select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Soa
+} // namespace Vectormath
+
+#include "vec_soa.h"
+#include "quat_soa.h"
+#include "mat_soa.h"
+
+#endif
diff --git a/Extras/vectormathlibrary/tests/Makefile b/Extras/vectormathlibrary/tests/Makefile
new file mode 100644
index 000000000..be50341e7
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/Makefile
@@ -0,0 +1,135 @@
+# Makefile for vector math library testsuite.
+#
+#   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms,
+#   with or without modification, are permitted provided that the
+#   following conditions are met:
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in the
+#      documentation and/or other materials provided with the distribution.
+#    * Neither the name of the Sony Computer Entertainment Inc nor the names
+#      of its contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+#   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#   POSSIBILITY OF SUCH DAMAGE.
+
+topdir = ..
+
+ARCH = scalar
+
+TESTS_all = \
+	test1_aos_c.elf \
+	test1_soa_c.elf \
+	test2_aos_c.elf \
+	test2_soa_c.elf \
+	test3_aos_c.elf \
+	test3_soa_c.elf \
+	test4_aos_c.elf \
+	test4_soa_c.elf \
+	test1_aos_cpp.elf \
+	test1_soa_cpp.elf \
+	test2_aos_cpp.elf \
+	test2_soa_cpp.elf \
+	test3_aos_cpp.elf \
+	test3_soa_cpp.elf \
+	test4_aos_cpp.elf \
+	test4_soa_cpp.elf
+
+TESTS_ppu = $(TESTS_all)
+ARCH_CFLAGS_ppu = -maltivec -mabi=altivec -I$(SIMDMATH_DIR)/common
+ARCH_LDFLAGS_ppu = -L$(SIMDMATH_DIR)/ppu -lsimdmath -static
+
+TESTS_spu = $(TESTS_all)
+ARCH_CFLAGS_spu = -I$(SIMDMATH_DIR)/common
+ARCH_LDFLAGS_spu = -L$(SIMDMATH_DIR)/spu -lsimdmath
+
+TESTS_SSE = \
+	test1_aos_cpp.elf \
+	test2_aos_cpp.elf \
+	test3_aos_cpp.elf \
+	test4_aos_cpp.elf
+ARCH_CFLAGS_SSE = -msse
+
+TESTS_scalar = \
+	test1_aos_c.elf \
+	test2_aos_c.elf \
+	test3_aos_c.elf \
+	test4_aos_c.elf \
+	test1_aos_cpp.elf \
+	test2_aos_cpp.elf \
+	test3_aos_cpp.elf \
+	test4_aos_cpp.elf
+
+TESTS = $(TESTS_$(ARCH))
+ARCH_CFLAGS = $(ARCH_CFLAGS_$(ARCH))
+ARCH_LDFLAGS = $(ARCH_LDFLAGS_$(ARCH))
+
+SIMDMATH_DIR = $(topdir)/../simdmathlibrary
+
+RESULTS = $(TESTS:.elf=.$(ARCH).out)
+DIFFS = $(RESULTS:.out=.cmp)
+
+CROSS =
+CC = $(CROSS)gcc
+CXX = $(CROSS)g++
+LD = $(CC)
+LDXX = $(CXX)
+
+CFLAGS = -O2 -W -Wall -D_VECTORMATH_DEBUG $(ARCH_CFLAGS)
+LDFLAGS = -lm $(ARCH_LDFLAGS)
+
+C_INCLUDES = -I$(topdir)/include/vectormath/c
+CXX_INCLUDES = -I$(topdir)/include/vectormath/cpp
+
+all: $(TESTS)
+
+check: $(DIFFS)
+
+clean:
+	-rm -f *.elf
+
+distclean: clean
+	-rm -f *.out *.cmp *~
+
+%_c.elf: %_c.c
+	$(CC) $(CFLAGS) $(C_INCLUDES) -o $@ $< $(LDFLAGS)
+
+%_cpp.elf: %_cpp.cpp
+	$(CXX) $(CFLAGS) $(CXX_INCLUDES) -o $@ $< $(LDFLAGS)
+
+%.$(ARCH).out: %.elf
+	./$< > $@
+
+test1_%.cmp: test1_%.out
+	./clean.pl < $< > $<.tmp
+	./compare.pl $<.tmp test1_reference.txt | tee $@
+	rm $<.tmp
+
+test2_%.cmp: test2_%.out
+	./clean.pl < $< > $<.tmp
+	./compare.pl $<.tmp test2_reference.txt | tee $@
+	rm $<.tmp
+
+test3_%.cmp: test3_%.out
+	./clean.pl < $< > $<.tmp
+	./compare.pl $<.tmp test3_reference.txt | tee $@
+	rm $<.tmp
+
+test4_%.cmp: test4_%.out
+	./clean.pl < $< > $<.tmp
+	./compare.pl $<.tmp test4_reference.txt | tee $@
+	rm $<.tmp
diff --git a/Extras/vectormathlibrary/tests/clean.pl b/Extras/vectormathlibrary/tests/clean.pl
new file mode 100644
index 000000000..ec425a2c4
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/clean.pl
@@ -0,0 +1,109 @@
+#!/usr/bin/perl
+
+#
+#  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms,
+#  with or without modification, are permitted provided that the
+#  following conditions are met:
+#   * Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+#   * Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#   * Neither the name of the Sony Computer Entertainment Inc nor the names
+#     of its contributors may be used to endorse or promote products derived
+#     from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#  POSSIBILITY OF SUCH DAMAGE.
+#
+
+$lineno = 0;
+
+sub getLine
+{
+   local( $line );
+   
+   $line = <STDIN>;
+
+   while( $line =~ m/^lv2\([^\)]*\)\:$/ )
+   {
+       $line = <STDIN>;
+   }
+
+   $line =~ s/^lv2\([^\)]*\)\: //;
+
+   return $line;
+}
+
+while(($line = <STDIN>) !~ m/__begin__/)
+{
+}
+
+$countSlotLines = 0;
+
+while( $line = &getLine )
+{
+   $lineno++;
+   
+   if ( $line =~ m/__end__/ )
+   {
+      exit;
+   }
+   
+   # if soa print, only save first slot
+
+   if ( $line =~ m/^slot ([1-3])/ )
+   {
+      while ( $line =~ m/^slot [1-3]/ )
+      {
+         # skip all lines for this slot
+
+         for ( $i = 0; $i < $slotLines; $i++ )
+         {
+            $line = &getLine;
+         }
+
+         # get next line
+
+         $line = &getLine;
+      }
+
+      # stop counting slot lines
+
+      $countSlotLines = 0;
+   }
+   elsif ( $countSlotLines )
+   {
+      $slotLines++;
+   }
+
+   if ( $line =~ m/^slot 0\:(.?)/ )
+   {
+      $countSlotLines = 1;
+
+      if ( $1 eq ' ' )
+      {
+         $line =~ s/^slot 0\: //;
+         $slotLines = 0;
+      }
+      else
+      {
+         $line = &getLine;
+         $slotLines = 1;
+      }
+   }
+
+   print $line;
+}
diff --git a/Extras/vectormathlibrary/tests/compare.pl b/Extras/vectormathlibrary/tests/compare.pl
new file mode 100644
index 000000000..f9fe1ca50
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/compare.pl
@@ -0,0 +1,95 @@
+#!/usr/bin/perl
+
+#
+#  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+#  All rights reserved.
+#
+#  Redistribution and use in source and binary forms,
+#  with or without modification, are permitted provided that the
+#  following conditions are met:
+#   * Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+#   * Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#   * Neither the name of the Sony Computer Entertainment Inc nor the names
+#     of its contributors may be used to endorse or promote products derived
+#     from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#  POSSIBILITY OF SUCH DAMAGE.
+#
+
+$file1 = $ARGV[0];
+$file2 = $ARGV[1];
+
+if (!open(FILE1, "<$file1"))
+{
+    print "Couldn't open $file1\n";
+    exit;
+}
+
+if (!open(FILE2, "<$file2"))
+{
+    print "Couldn't open $file2\n";
+    exit;
+}
+
+print "Comparing $file1 $file2\n";
+
+$lineno1 = 0;
+$lineno2 = 0;
+
+while(($line1 = <FILE1>) && ($line2 = <FILE2>))
+{
+   $lineno1++;
+   $lineno2++;
+   
+   if ( $line1 =~ m/\:$/ )
+   {
+      $line1 = <FILE1>;
+      $lineno1++;
+   }
+   
+   if ( $line2 =~ m/\:$/ )
+   {
+      $line2 = <FILE2>;
+      $lineno2++;
+   }
+
+   $line1 =~ s/^.*\: //g;
+   $line2 =~ s/^.*\: //g;
+
+   @words1 = split(/ /,$line1);
+   @words2 = split(/ /,$line2);
+   
+   for ($i = 0; $i < @words1; $i++)
+   {
+      $word1 = $words1[$i];
+      $word2 = $words2[$i];
+      
+      $word1 =~ s/\s//g;
+      $word2 =~ s/\s//g;
+      
+      if ( $word1 ne $word2 )
+      {
+         $error = abs($word1 - $word2);
+
+         $limit = abs(1e-4 * $word1);
+         
+         if ( $error > $limit && !( abs($word1) < 1e-4 && $error < 1e-4 ) )
+         {
+            print "$lineno1: $word1 $lineno2: $word2\n";
+         }
+      }
+   }
+}
diff --git a/Extras/vectormathlibrary/main_vmtest.cpp b/Extras/vectormathlibrary/tests/main_vmtest.cpp
similarity index 96%
rename from Extras/vectormathlibrary/main_vmtest.cpp
rename to Extras/vectormathlibrary/tests/main_vmtest.cpp
index 7aa66e5aa..2e8262bc5 100644
--- a/Extras/vectormathlibrary/main_vmtest.cpp
+++ b/Extras/vectormathlibrary/tests/main_vmtest.cpp
@@ -1,163 +1,163 @@
-
-///Testfile to test differences between vectormath and Bullet LinearMath
-
-#ifdef __PPU__
-#include "include/vectormath/ppu/cpp/vectormath_aos.h"
-#elif defined __SPU__
-#include "include/vectormath/spu/cpp/vectormath_aos.h"
-#else
-#include "include/vectormath/SSE/cpp/vectormath_aos.h"
-//#include "include/vectormath/scalar/cpp/vectormath_aos.h"
-#endif
-
-#include "../../src/LinearMath/btTransform.h"
-#include <stdio.h>
-
-//Bullet, a btVector can be used for both points and vectors. 
-//it is up to the user/developer to use the right multiplication: btTransform for points, and btQuaternion or btMatrix3x3 for vectors.
-void	BulletTest()
-{
-
-	printf("Bullet Linearmath\n");
-
-	btTransform	tr;
-	tr.setIdentity();
-
-	tr.setOrigin(btVector3(10,0,0));
-	//initialization
-	btVector3	pointA(0,0,0);
-	btVector3	pointB,pointC,pointD,pointE;
-	//assignment
-	pointB = pointA;
-	//in-place initialization
-	pointB.setValue(1,2,3);
-	//transform over tr	
-	pointB = tr * pointA;
-	printf("pointB = tr * pointA = (%f,%f,%f)\n",pointB.getX(),pointB.getY(),pointB.getZ());
-	//transform over tr	
-	pointE = tr(pointA);
-	//inverse transform
-	pointC = tr.inverse() * pointA;
-	printf("pointC = tr.inverse() * pointA = (%f,%f,%f)\n",pointC.getX(),pointC.getY(),pointC.getZ());
-	//inverse transform
-	pointD = tr.invXform( pointA );
-	btScalar	x;
-	//dot product
-	x = pointD.dot(pointE);
-	//square length
-	x = pointD.length2();
-	//length
-	x = pointD.length();
-
-	const btVector3& constPointD = pointD;
-
-	//get a normalized vector from constPointD, without changing constPointD
-	btVector3 norm = constPointD.normalized();
-
-	//in-place normalize pointD
-	pointD.normalize();
-
-	//quaternions & matrices
-	btQuaternion	quat(0,0,0,1);
-	btQuaternion	quat1(btVector3(0,1,0),90.f * SIMD_RADS_PER_DEG);
-	btMatrix3x3		mat0(quat1);
-	btMatrix3x3		mat1 = mat0.inverse();
-	btMatrix3x3		mat2  = mat0.transpose();
-	btTransform tr1(mat2,btVector3(0,10,0));
-	btTransform tr2 =tr1.inverse();
-	btVector3	pt0(1,1,1);
-	btVector3	pt1 = tr2 * pt0;
-
-	printf("btVector3	pt1 = tr2 * pt0 =  (%f,%f,%f)\n",pt1.getX(),pt1.getY(),pt1.getZ());
-
-	
-	btVector3	pt2 = tr2.getBasis() * pt0;
-	btVector3	pt3 = pt0 * tr2.getBasis();
-	btVector3	pt4 =  tr2.getBasis().inverse() * pt0;
-	btTransform tr3 =  tr2.inverseTimes(tr2);
-
-
-
-}
-
-//vectormath makes a difference between point and vector.
-void	VectormathTest()
-{
-
-	printf("Vectormath\n");
-
-	Vectormath::Aos::Transform3 tr;
-	tr = Vectormath::Aos::Transform3::identity();
-
-	tr.setTranslation(Vectormath::Aos::Vector3(10,0,0));
-	//initialization
-	Vectormath::Aos::Point3	pointA(0,0,0);
-	Vectormath::Aos::Point3	pointB,pointC,pointE;
-	Vectormath::Aos::Vector3 pointD;
-	//assignment
-	pointB = pointA;
-	//in-place initialization
-	pointB = Vectormath::Aos::Point3(1,2,3); //or
-	pointB.setElem(0,1); //or
-	pointB.setX(1);
-
-	//transform over tr	
-	pointB = tr * pointA;
-
-	printf("pointB = tr * pointA = (%f,%f,%f)\n",(float)pointB.getX(),(float)pointB.getY(),(float)pointB.getZ());
-	//transform over tr	
-	//pointE = tr(pointA);
-	//inverse transform
-	pointC = Vectormath::Aos::inverse(tr) * pointA;
-	printf("Vectormath::Aos::inverse(tr) * pointA = (%f,%f,%f)\n",(float)pointC.getX(),(float)pointC.getY(),(float)pointC.getZ());
-	
-	
-	
-	btScalar	x;
-	//dot product
-	x = Vectormath::Aos::dot(Vectormath::Aos::Vector3(pointD),Vectormath::Aos::Vector3(pointE));
-	//square length
-	x = Vectormath::Aos::lengthSqr(Vectormath::Aos::Vector3(pointD));
-	//length
-	x = Vectormath::Aos::length(Vectormath::Aos::Vector3(pointD));
-
-	const Vectormath::Aos::Vector3& constPointD = (Vectormath::Aos::Vector3&)pointD;
-
-	//get a normalized vector from constPointD, without changing constPointD
-	Vectormath::Aos::Vector3 norm = Vectormath::Aos::normalize(constPointD);
-
-	//in-place normalize pointD
-	pointD = Vectormath::Aos::normalize(Vectormath::Aos::Vector3(pointD));
-
-	//quaternions & matrices
-	Vectormath::Aos::Quat quat(0,0,0,1);
-	Vectormath::Aos::Quat quat1;
-	quat1 = Vectormath::Aos::Quat::rotationY(90.f * SIMD_RADS_PER_DEG);
-	
-	Vectormath::Aos::Matrix3	mat0(quat1);
-	
-	Vectormath::Aos::Matrix3	mat1 = Vectormath::Aos::inverse(mat0);
-	Vectormath::Aos::Matrix3	mat2  = Vectormath::Aos::transpose(mat0);
-	Vectormath::Aos::Transform3 tr1(mat2,Vectormath::Aos::Vector3(0,10,0));
-	Vectormath::Aos::Transform3	tr2 = Vectormath::Aos::inverse(tr1);
-	Vectormath::Aos::Point3	pt0(1,1,1);
-	Vectormath::Aos::Point3	pt1 = tr2 * pt0;
-	printf("Vectormath::Aos::Vector3	pt1 = tr2 * pt0; =  (%f,%f,%f)\n",(float)pt1.getX(),(float)pt1.getY(),(float)pt1.getZ());
-
-	Vectormath::Aos::Vector3	pt2 = tr2.getUpper3x3() * Vectormath::Aos::Vector3(pt0);
-	//Vectormath::Aos::Vector3	pt3 = pt0 * tr2.getUpper3x3();
-	Vectormath::Aos::Vector3	pt3 = Vectormath::Aos::inverse(tr2.getUpper3x3()) * Vectormath::Aos::Vector3(pt0);
-	Vectormath::Aos::Vector3	pt4 =  Vectormath::Aos::inverse(tr2.getUpper3x3()) * Vectormath::Aos::Vector3(pt0);
-	Vectormath::Aos::Transform3		tr3 =  Vectormath::Aos::inverse(tr2) * tr2;
-
-}
-
-int main()
-{
-
-	BulletTest();
-
-	VectormathTest();
-	
-	return 0;
-}
+
+///Testfile to test differences between vectormath and Bullet LinearMath
+
+#ifdef __PPU__
+#include "include/vectormath/ppu/cpp/vectormath_aos.h"
+#elif defined __SPU__
+#include "include/vectormath/spu/cpp/vectormath_aos.h"
+#else
+#include "include/vectormath/SSE/cpp/vectormath_aos.h"
+//#include "include/vectormath/scalar/cpp/vectormath_aos.h"
+#endif
+
+#include "../../src/LinearMath/btTransform.h"
+#include <stdio.h>
+
+//Bullet, a btVector can be used for both points and vectors. 
+//it is up to the user/developer to use the right multiplication: btTransform for points, and btQuaternion or btMatrix3x3 for vectors.
+void	BulletTest()
+{
+
+	printf("Bullet Linearmath\n");
+
+	btTransform	tr;
+	tr.setIdentity();
+
+	tr.setOrigin(btVector3(10,0,0));
+	//initialization
+	btVector3	pointA(0,0,0);
+	btVector3	pointB,pointC,pointD,pointE;
+	//assignment
+	pointB = pointA;
+	//in-place initialization
+	pointB.setValue(1,2,3);
+	//transform over tr	
+	pointB = tr * pointA;
+	printf("pointB = tr * pointA = (%f,%f,%f)\n",pointB.getX(),pointB.getY(),pointB.getZ());
+	//transform over tr	
+	pointE = tr(pointA);
+	//inverse transform
+	pointC = tr.inverse() * pointA;
+	printf("pointC = tr.inverse() * pointA = (%f,%f,%f)\n",pointC.getX(),pointC.getY(),pointC.getZ());
+	//inverse transform
+	pointD = tr.invXform( pointA );
+	btScalar	x;
+	//dot product
+	x = pointD.dot(pointE);
+	//square length
+	x = pointD.length2();
+	//length
+	x = pointD.length();
+
+	const btVector3& constPointD = pointD;
+
+	//get a normalized vector from constPointD, without changing constPointD
+	btVector3 norm = constPointD.normalized();
+
+	//in-place normalize pointD
+	pointD.normalize();
+
+	//quaternions & matrices
+	btQuaternion	quat(0,0,0,1);
+	btQuaternion	quat1(btVector3(0,1,0),90.f * SIMD_RADS_PER_DEG);
+	btMatrix3x3		mat0(quat1);
+	btMatrix3x3		mat1 = mat0.inverse();
+	btMatrix3x3		mat2  = mat0.transpose();
+	btTransform tr1(mat2,btVector3(0,10,0));
+	btTransform tr2 =tr1.inverse();
+	btVector3	pt0(1,1,1);
+	btVector3	pt1 = tr2 * pt0;
+
+	printf("btVector3	pt1 = tr2 * pt0 =  (%f,%f,%f)\n",pt1.getX(),pt1.getY(),pt1.getZ());
+
+	
+	btVector3	pt2 = tr2.getBasis() * pt0;
+	btVector3	pt3 = pt0 * tr2.getBasis();
+	btVector3	pt4 =  tr2.getBasis().inverse() * pt0;
+	btTransform tr3 =  tr2.inverseTimes(tr2);
+
+
+
+}
+
+//vectormath makes a difference between point and vector.
+void	VectormathTest()
+{
+
+	printf("Vectormath\n");
+
+	Vectormath::Aos::Transform3 tr;
+	tr = Vectormath::Aos::Transform3::identity();
+
+	tr.setTranslation(Vectormath::Aos::Vector3(10,0,0));
+	//initialization
+	Vectormath::Aos::Point3	pointA(0,0,0);
+	Vectormath::Aos::Point3	pointB,pointC,pointE;
+	Vectormath::Aos::Vector3 pointD;
+	//assignment
+	pointB = pointA;
+	//in-place initialization
+	pointB = Vectormath::Aos::Point3(1,2,3); //or
+	pointB.setElem(0,1); //or
+	pointB.setX(1);
+
+	//transform over tr	
+	pointB = tr * pointA;
+
+	printf("pointB = tr * pointA = (%f,%f,%f)\n",(float)pointB.getX(),(float)pointB.getY(),(float)pointB.getZ());
+	//transform over tr	
+	//pointE = tr(pointA);
+	//inverse transform
+	pointC = Vectormath::Aos::inverse(tr) * pointA;
+	printf("Vectormath::Aos::inverse(tr) * pointA = (%f,%f,%f)\n",(float)pointC.getX(),(float)pointC.getY(),(float)pointC.getZ());
+	
+	
+	
+	btScalar	x;
+	//dot product
+	x = Vectormath::Aos::dot(Vectormath::Aos::Vector3(pointD),Vectormath::Aos::Vector3(pointE));
+	//square length
+	x = Vectormath::Aos::lengthSqr(Vectormath::Aos::Vector3(pointD));
+	//length
+	x = Vectormath::Aos::length(Vectormath::Aos::Vector3(pointD));
+
+	const Vectormath::Aos::Vector3& constPointD = (Vectormath::Aos::Vector3&)pointD;
+
+	//get a normalized vector from constPointD, without changing constPointD
+	Vectormath::Aos::Vector3 norm = Vectormath::Aos::normalize(constPointD);
+
+	//in-place normalize pointD
+	pointD = Vectormath::Aos::normalize(Vectormath::Aos::Vector3(pointD));
+
+	//quaternions & matrices
+	Vectormath::Aos::Quat quat(0,0,0,1);
+	Vectormath::Aos::Quat quat1;
+	quat1 = Vectormath::Aos::Quat::rotationY(90.f * SIMD_RADS_PER_DEG);
+	
+	Vectormath::Aos::Matrix3	mat0(quat1);
+	
+	Vectormath::Aos::Matrix3	mat1 = Vectormath::Aos::inverse(mat0);
+	Vectormath::Aos::Matrix3	mat2  = Vectormath::Aos::transpose(mat0);
+	Vectormath::Aos::Transform3 tr1(mat2,Vectormath::Aos::Vector3(0,10,0));
+	Vectormath::Aos::Transform3	tr2 = Vectormath::Aos::inverse(tr1);
+	Vectormath::Aos::Point3	pt0(1,1,1);
+	Vectormath::Aos::Point3	pt1 = tr2 * pt0;
+	printf("Vectormath::Aos::Vector3	pt1 = tr2 * pt0; =  (%f,%f,%f)\n",(float)pt1.getX(),(float)pt1.getY(),(float)pt1.getZ());
+
+	Vectormath::Aos::Vector3	pt2 = tr2.getUpper3x3() * Vectormath::Aos::Vector3(pt0);
+	//Vectormath::Aos::Vector3	pt3 = pt0 * tr2.getUpper3x3();
+	Vectormath::Aos::Vector3	pt3 = Vectormath::Aos::inverse(tr2.getUpper3x3()) * Vectormath::Aos::Vector3(pt0);
+	Vectormath::Aos::Vector3	pt4 =  Vectormath::Aos::inverse(tr2.getUpper3x3()) * Vectormath::Aos::Vector3(pt0);
+	Vectormath::Aos::Transform3		tr3 =  Vectormath::Aos::inverse(tr2) * tr2;
+
+}
+
+int main()
+{
+
+	BulletTest();
+
+	VectormathTest();
+	
+	return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test.h b/Extras/vectormathlibrary/tests/test.h
new file mode 100644
index 000000000..bc231ea68
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test.h
@@ -0,0 +1,346 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_TEST_H
+#define _VECTORMATH_TEST_H
+
+#if defined(__SPU__)
+#  define vec_splats_float(v) spu_splats(v)
+#  define vec_mul_float(a, b) spu_mul(a, b)
+#  define vec_add_float(a, b) spu_add(a, b)
+#  define vec_sub_float(a, b) spu_sub(a, b)
+#elif defined(__ALTIVEC__)
+#  define vec_splats_float(v) ((vec_float4){(float)(v),(float)(v),(float)(v),(float)(v)})
+#  define vec_mul_float(a, b) vec_madd(a, b, vec_splats_float(0.0f))
+#  define vec_add_float(a, b) vec_add(a, b)
+#  define vec_sub_float(a, b) vec_sub(a, b)
+#elif defined(__SSE__)
+#  error "Not implemented."
+#else
+#  define _VECTORMATH_SCALAR_TEST
+#endif
+
+float randfloats[1024] = {
+  -0.658343927787421f,0.499803960969928f,-0.807256688752865f,0.740930454054151f,
+  0.154607013590216f,0.571598517679348f,0.38438830691728f,-0.262467460159158f,
+  0.747808153723618f,0.49019019690013f,-0.107908181813777f,-0.292543593813249f,
+  0.465039264005078f,-0.47955599783424f,-0.211412450152245f,0.553579902451233f,
+  0.690070275160572f,0.151576400965553f,0.431077190337326f,-0.833991507852247f,
+  -0.0883497426804851f,-0.780106371638709f,0.0904560476331469f,-0.218626858702649f,
+  0.137170846428894f,0.918132898976751f,0.735438192918274f,-0.673620979495283f,
+  -0.448982146263369f,-0.479277810255866f,0.848189483738331f,-0.12815472579463f,
+  0.578921731104181f,-0.744765966376519f,-0.83558862791913f,0.881284032895692f,
+  -0.948850147493964f,-0.691578137344351f,-0.235635149443688f,-0.690526600666615f,
+  0.0586668362339609f,0.753696982166502f,-0.138777123323202f,-0.472187547315841f,
+  -0.372811001953572f,0.540183371709446f,-0.78521823389179f,0.542084510391291f,
+  0.410391117788897f,-0.562720682584484f,0.523587985711991f,-0.176573908842087f,
+  0.297653645858226f,0.859912509814734f,0.00483713119734119f,0.374881358857735f,
+  -0.127818103705344f,0.21660181589948f,0.153117267270737f,0.265243421428139f,
+  -0.0731487047425148f,0.264487579220173f,-0.723410134701346f,0.921522835850382f,
+  -0.711249997824318f,-0.106633857864438f,-0.350831080309241f,0.905168155184938f,
+  -0.283632179037646f,-0.203583555015513f,-0.797436915535236f,0.910171471305759f,
+  0.96923389534215f,0.151940162902974f,0.731827470770519f,-0.700248217534451f,
+  0.818300860563319f,0.302505017280083f,-0.872278290470156f,0.909998773124912f,
+  0.932525528551317f,0.571086770427939f,0.610329635790002f,0.142507359591505f,
+  -0.43482856009151f,0.925102103935927f,0.158954117892613f,-0.126282746058862f,
+  -0.249127650925452f,0.846815218386041f,-0.942601239873774f,0.537719955431001f,
+  0.446213543435171f,0.181938699625931f,-0.148222922840326f,0.284286118166037f,
+  0.493525458201255f,-0.861962900371793f,-0.893410102755276f,0.548627291142694f,
+  0.407006961478977f,-0.757467096890906f,-0.393126176069536f,-0.850984293029867f,
+  0.375719573110992f,-0.270087780463427f,0.45888819784826f,-0.610827766442796f,
+  -0.690815628408266f,-0.676415221072347f,0.664465776005038f,0.101873923854313f,
+  -0.365713939355025f,0.0554727439164822f,-0.133556089035984f,-0.572643072647438f,
+  0.459209235899415f,-0.997261395030506f,0.172408991295974f,-0.0451240115316551f,
+  0.879715937508259f,0.524317125330867f,-0.744532077249495f,-0.970443523820151f,
+  -1.32784686215359e-05f,0.689543072301063f,0.704297111725616f,-0.817983008667298f,
+  0.71550535643383f,0.577868436783262f,0.156952383805468f,-0.80102179210472f,
+  0.65633547615213f,0.494393128677046f,0.816743155389922f,0.0242848471199721f,
+  0.769131722839866f,0.923894866655701f,0.133021600755988f,-0.0522193159450097f,
+  -0.164886284373118f,0.300690282133047f,0.760403145270168f,0.171869369152077f,
+  -0.554975788488598f,0.998693253807744f,-0.681641007087912f,0.391194738828759f,
+  0.403058705852153f,0.972411306067528f,0.297195187773745f,0.309761312769567f,
+  0.688408077837671f,0.363539666742895f,0.94029653976505f,-0.336683042511069f,
+  0.600164345782652f,-0.681271587780742f,0.726558239385319f,0.205513360229233f,
+  -0.16008221686365f,0.962714155748642f,0.737793770111715f,-0.0719258703692134f,
+  -0.506312816314299f,0.689277082946518f,0.686485424709581f,0.473013144786293f,
+  -0.735610421404026f,-0.0463900680836105f,0.568673968586005f,-0.00481466271939723f,
+  0.137636823654454f,-0.111879120785687f,-0.929542605813147f,-0.336303463382606f,
+  -0.146740182632236f,0.165140351947514f,-0.823874099621072f,0.349776463047661f,
+  0.174872304411146f,-0.528584334304156f,0.489291834762803f,0.916707538240686f,
+  0.728510889338118f,-0.851139787348188f,0.0796199799283031f,-0.234369971523549f,
+  -0.996308342568362f,0.433228819394486f,-0.892684128206504f,-0.95791073791078f,
+  0.517122298113016f,0.257920984044574f,0.862028434359466f,0.0958813977553206f,
+  -0.171932523577503f,-0.214077886807473f,-0.604841274532937f,-0.38383141210609f,
+  -0.58149997525441f,0.222182734948547f,-0.2561203625418f,-0.678699493062183f,
+  -0.0795530448181339f,0.605960090732907f,-0.633147389976791f,0.435875222532317f,
+  -0.0466270522618899f,-0.71649136999298f,0.267317414957141f,-0.514873596167021f,
+  -0.751699524124625f,0.742958874040227f,-0.793179510948171f,0.508814009999512f,
+  -0.238839286150373f,0.113471002014307f,-0.843523253083085f,-0.245249991279181f,
+  0.250368454758338f,0.579243470287253f,-0.157280074848025f,0.648487464157242f,
+  0.103833079240538f,0.456401128469849f,-0.0223720820167514f,-0.475631368950744f,
+  -0.0041782226245104f,-0.0208652308868125f,-0.0169971127675765f,0.699143621917685f,
+  0.83779636548531f,-0.276082033737794f,0.0915817913013015f,0.209063902268738f,
+  0.219316780269516f,-0.118359453100055f,0.413442003735092f,-0.567697560481989f,
+  0.531358299984625f,-0.387225776610819f,0.572489506868486f,-0.820417090039186f,
+  0.797191361229615f,0.867177919412683f,0.934764375062564f,0.237092079542023f,
+  -0.866161864691826f,-0.773938728379676f,0.261310530107004f,-0.851569556583101f,
+  0.114814425111355f,-0.531592190789155f,0.223924683490957f,0.869104561345111f,
+  0.143404566999386f,0.148517529008075f,-0.0711363985626363f,-0.758291614176514f,
+  -0.527633502740933f,0.99721511923596f,0.114439963849399f,0.72755837253878f,
+  -0.425760405626697f,0.459888066153781f,0.642515762634126f,-0.0225335867899474f,
+  0.186094961562027f,-0.775678571017551f,-0.683400976752644f,0.398133764191002f,
+  0.189642093838877f,0.765986315134853f,-0.137794739254879f,-0.579843714684088f,
+  -0.63564699482432f,0.374970154657689f,-0.563749876868947f,-0.471075422601373f,
+  -0.553799500723066f,-0.0146881150006948f,-0.464365244644604f,-0.10788986146774f,
+  -0.527503023060852f,-0.406422760262416f,0.301261161950258f,0.499528573888128f,
+  0.385179609005043f,-0.150218387266079f,0.519111879977139f,-0.203208683924331f,
+  -0.252017508479717f,0.282193567323901f,0.0676372217166588f,0.798376368300907f,
+  0.310782163860559f,0.861334103612307f,-0.980345166653173f,-0.655105604450725f,
+  0.286765236319503f,0.532078410709602f,0.352670966735097f,0.540977184819425f,
+  0.510961465933072f,0.791871139829084f,-0.564378698589145f,0.273199199047909f,
+  0.194378063163676f,0.244636363558442f,-0.2696079922111f,-0.858162214209599f,
+  -0.495023067522474f,-0.277797538673553f,-0.0327403642191157f,0.00741169596342672f,
+  -0.420178428178723f,-0.522576683894926f,0.324971970060567f,0.795389045001329f,
+  0.342900104539247f,-0.913636452280628f,0.675221839440177f,0.144052833646484f,
+  -0.632328982629893f,-0.947119831218089f,-0.0493673719309484f,0.126332763266575f,
+  -0.66420574517786f,0.220879155225703f,0.284218535482147f,-0.387215543918998f,
+  0.913567998448777f,0.531906099678991f,0.271995095904906f,-0.862600551020719f,
+  -0.738693635668703f,0.514248487507359f,-0.0393632803376036f,0.429389595727585f,
+  -0.769468991576751f,0.28133632724311f,-0.203301313955485f,0.412585911285348f,
+  0.567925862321268f,0.410131004328946f,-0.462918277454527f,0.560952548692129f,
+  -0.731715443500342f,-0.446157565377547f,-0.837491324975311f,-0.573480361464263f,
+  -0.607819850918752f,0.23841499693998f,0.213445432027605f,0.0986122683758737f,
+  0.135072190814675f,-0.749273552937012f,-0.855977160741141f,0.765675059673342f,
+  -0.693447453911567f,0.131554184087008f,-0.366756547983336f,-0.330409262236842f,
+  -0.588815619465343f,0.352532978762866f,-0.920522750723883f,-0.915255088789323f,
+  0.631923943060777f,-0.870739292438145f,0.415604498050605f,-0.180973894496887f,
+  0.775697838994837f,0.879356890591083f,-0.993957564335638f,-0.298451942545356f,
+  0.876855036440425f,0.982846031192253f,-0.282564044364371f,0.95346849594155f,
+  -0.947311505090191f,-0.317822974923359f,0.692391664998802f,-0.123566763365389f,
+  0.407041678839057f,0.141204372181107f,-0.793069847914673f,-0.638275471740698f,
+  -0.51955405631346f,0.368499710074474f,0.970213689037045f,0.0272163305900648f,
+  0.801791483976395f,-0.0663635812993562f,-0.0512834823579524f,-0.184935295126749f,
+  0.516982835871183f,-0.769951152485618f,-0.708267044537941f,0.398266880674086f,
+  -0.479614543356881f,0.0604142126476859f,-0.867394563338259f,-0.702364045743693f,
+  -0.18260171523908f,-0.83280747136078f,0.278190893454216f,-0.967562302471109f,
+  -0.52029595778054f,0.160191201466318f,-0.677989785356466f,-0.470750261667419f,
+  -0.846579999787899f,-0.705750757861409f,-0.825367487626302f,-0.712890462610304f,
+  -0.0644873847193068f,0.444064587944183f,-0.0452257881147275f,0.116544259603998f,
+  -0.00728451932408092f,-0.838230133415692f,-0.410766823165496f,-0.40929905742685f,
+  -0.336682948179195f,-0.830699768111344f,-0.801729180453698f,-0.595152571835385f,
+  -0.784671779507129f,-0.653655236426147f,0.670791046451306f,0.653571468806589f,
+  0.850715654625567f,0.0714334825891569f,-0.0577154211012854f,0.401895373758876f,
+  0.0168605144772656f,0.5354384129881f,0.965901293540405f,-0.0726753529456374f,
+  0.341689326326282f,0.781662445971847f,0.707322369548329f,0.505889413040066f,
+  0.43279006682301f,-0.82579284323046f,0.597718837056043f,-0.250904847502525f,
+  -0.085712490334565f,-0.27284668464705f,0.552644217406645f,0.19420250718521f,
+  -0.884426763638054f,-0.756791257372726f,0.121106956043313f,0.312482778322803f,
+  -0.873316960494485f,0.0450805196969668f,0.687443898195021f,-0.684625629977333f,
+  -0.994046624794031f,0.800365484075002f,-0.565640229111075f,-0.625626063021869f,
+  0.932584676112249f,0.636927568286012f,0.420250137262336f,0.175862511828669f,
+  0.362158342804186f,0.547495978208275f,-0.534422794348934f,-0.563030463450744f,
+  -0.254488285756501f,0.647777595706543f,-0.674240168502628f,0.224344628949055f,
+  -0.754563097553635f,-0.12881655237274f,-0.997838330250381f,0.0369162049154141f,
+  -0.077549312424928f,0.376025736059717f,-0.935044655882827f,-0.189804125597547f,
+  0.00764933224392195f,-0.440298102088747f,0.994796322297979f,-0.271484040660724f,
+  -0.2590701870053f,0.902148580301628f,-0.836501451887365f,0.229092669621153f,
+  0.5863932076197f,-0.287825592948792f,0.942655407357726f,-0.634432455852235f,
+  -0.140437555704573f,0.570869095619685f,-0.764965080867434f,0.0675228424859284f,
+  -0.514589062662218f,0.233090988246516f,0.554487773397391f,-0.633529215840056f,
+  -0.0193735702003366f,0.869258510751365f,-0.369818396412519f,-0.280689998188947f,
+  -0.797208739895403f,-0.255233407527697f,0.780605315236578f,0.789803426518219f,
+  0.974261893445153f,-0.785980203848396f,-0.701385987245636f,0.871088183435759f,
+  0.566742533772562f,0.75227294222347f,-0.476301298243307f,-0.747341931420877f,
+  0.0773855838641069f,-0.305599507084935f,-0.229193881336336f,-0.260009071921601f,
+  -0.0515187258224827f,0.459831012841761f,0.861793377659517f,-0.00839510548009059f,
+  -0.0535644390132504f,0.912885769359548f,-0.402379747450233f,-0.10435292389046f,
+  0.644045025641198f,-0.235395897167933f,0.454549452554879f,-0.303775931682779f,
+  0.321575614700528f,0.724025709176772f,0.330082831646187f,0.609903689400269f,
+  0.851604270169993f,-0.356715440425745f,-0.0455002843413084f,0.13772975575408f,
+  -0.148995564903764f,0.892453960056798f,-0.821676934893254f,-0.14847536459807f,
+  -0.381886292618752f,0.398820351836747f,-0.292289360617424f,0.0702858731114446f,
+  -0.772469821167697f,0.474357996692333f,0.857792314833965f,-0.720558506386439f,
+  0.888908862188352f,-0.950919194501893f,-0.196258139903655f,-0.59980821479941f,
+  -0.794413298851502f,-0.927635622930836f,-0.18747758800405f,-0.113038430406718f,
+  0.366119602275866f,-0.483786056117111f,0.622670249344587f,-0.801805699607442f,
+  0.295383348060291f,0.0220054959489744f,-0.000678144987013241f,-0.68933407333013f,
+  0.266537772395942f,-0.785099000882013f,-0.572664686201399f,0.772760435300924f,
+  0.97189582826514f,-0.961362535549199f,0.723449439795125f,-0.758441529489218f,
+  -0.733158222482722f,0.13966678591575f,-0.73814671277507f,0.727223546774141f,
+  0.0899213092919311f,-0.113118130362636f,-0.364554501988017f,-0.137644753297124f,
+  0.933846571857863f,0.615857754190102f,0.442837902751755f,0.691293553853697f,
+  0.862436507424839f,-0.769632706987466f,-0.069558455823973f,-0.216368763497925f,
+  -0.240286864167238f,-0.494147338088084f,-0.732343541285623f,0.247500097354347f,
+  0.238013165932848f,-0.0222504081638135f,0.0354097573205863f,0.562870060908224f,
+  -0.443182852051812f,-0.148476065390099f,-0.785559583638936f,-0.534284390277335f,
+  0.804060941136193f,0.161693072162677f,0.626454348053834f,0.564290128221401f,
+  0.414242183673117f,-0.314079097076757f,0.871460392700492f,-0.586016607169839f,
+  0.446183340840953f,0.670697807509214f,-0.31261251323f,-0.34635111476409f,
+  -0.32237160168031f,0.0615992346122596f,0.108866036772035f,-0.96854607364277f,
+  -0.0900520153269255f,-0.000638517939648864f,-0.497526906200939f,0.0210549572282304f,
+  -0.513626724036349f,0.219974002433304f,-0.406318097379831f,0.829310754424156f,
+  0.139888080843043f,-0.409132737685127f,-0.0296375827238435f,-0.713853913104842f,
+  0.286946651436075f,-0.734473045715816f,0.649341057254212f,0.0281454231254514f,
+  -0.188432361127518f,-0.683807673918693f,0.432885949511977f,-0.0361810926016091f,
+  0.438347837417645f,0.710651677026334f,0.673928786874676f,-0.0546809333762468f,
+  0.826058562813905f,-0.832265197271681f,-0.668493142354613f,0.35502470967181f,
+  0.5354983136667f,0.82137347039f,-0.143300610352533f,0.713426364459551f,
+  -0.297806605505109f,0.340760003596245f,-0.564118270640485f,-0.0240265108522948f,
+  -0.60137093958766f,-0.178866127631323f,-0.229680331692812f,0.741055717732976f,
+  -0.219526898682084f,0.534269946476002f,0.133115059442581f,-0.249583317941799f,
+  0.811487242943805f,0.699282902509054f,-0.43331663912921f,0.71961761681392f,
+  0.943524928147106f,0.523603303061634f,-0.677065472025312f,-0.636492873531878f,
+  0.0384525794274069f,-0.377124785497273f,0.967031372742831f,0.435091298236713f,
+  0.161520957813146f,-0.15972397671726f,-0.166845254078147f,-0.587937091804449f,
+  0.910419348982238f,0.764819474485904f,-0.987496701506423f,-0.838882086564368f,
+  -0.41301635023239f,0.740276015423035f,0.951437768918503f,0.284889876776347f,
+  -0.158434377954073f,0.73838340645338f,-0.370863653408783f,0.989946556977472f,
+  -0.848079748582045f,-0.449771614068709f,0.663499305019705f,-0.93185425837116f,
+  0.523896920061681f,-0.232489928806984f,-0.538281594630583f,-0.899682373052556f,
+  -0.338756398325579f,0.951550867830917f,0.494550167204373f,0.194556783574981f,
+  0.243717136517866f,0.224354070146973f,0.143792613511486f,-0.333553884855363f,
+  0.229952471240104f,-0.204077808365582f,0.469903146410694f,-0.958373658125808f,
+  -0.226054823628623f,0.229899490605959f,0.196501423332386f,0.879845549743308f,
+  0.577560209175438f,0.641089277668982f,0.642316105885037f,0.232802361926147f,
+  -0.381538604113949f,0.640474533642731f,-0.913151229860155f,-0.614188561923768f,
+  0.171776090268345f,-0.645757746280538f,-0.122217277374411f,0.00689941379876302f,
+  0.019433549917359f,-0.0675691088088968f,-0.373650414044171f,-0.67103881780362f,
+  0.504459382741295f,-0.14911057310502f,0.148122926701966f,0.694898716737626f,
+  0.585837578674649f,-0.750790936323284f,0.853926520879604f,0.15730556487317f,
+  -0.113618219173468f,0.320367463625416f,-0.552134285767956f,-0.920422170186882f,
+  -0.338845809007829f,-0.810536065394189f,-0.177695639626343f,0.351905028679525f,
+  0.0633494717407146f,-0.865174843305965f,0.634557634797282f,-0.787750206138512f,
+  -0.379512173859858f,0.791878043518267f,-0.704956748246282f,-0.670757904173982f,
+  0.684751731590218f,-0.953277716787511f,0.985770879077755f,0.122219634876117f,
+  0.1721202012071f,-0.388256876288864f,-0.73825752183587f,0.906019401398325f,
+  -0.803381430261645f,-0.74941181755554f,0.0727255174881734f,-0.221605735994771f,
+  -0.564223723915198f,-0.213656403861052f,-0.608162856680174f,0.921170937654253f,
+  0.0847855661657562f,0.417987807384179f,-0.330418934403873f,0.648809934312297f,
+  0.0796460930869003f,0.550628835897015f,-0.92719924470385f,0.877452232036589f,
+  0.559779671355166f,-0.0896538710039181f,0.849093177103803f,-0.747330460544603f,
+  -0.87952842676733f,0.791614152693995f,-0.483542677352212f,0.795053306558017f,
+  -0.246444636768096f,-0.967515102978155f,0.159471890174437f,0.210810164441753f,
+  -0.874227023772981f,-0.0805964482800334f,0.567497201138046f,-0.137743608785641f,
+  -0.710773819308763f,-0.589571113738941f,0.83391847788473f,0.474349423613731f,
+  -0.495444643831966f,-0.415319533683743f,-0.688818445640273f,0.923238489082379f,
+  0.688452278243922f,-0.639758117768224f,0.353458578584501f,-0.352412753977475f,
+  -0.667399029986676f,0.586965778853283f,0.352610916528867f,0.360697015758419f,
+  -0.502839728688848f,-0.539644214882799f,0.365329906944815f,-0.379932106654209f,
+  -0.269280779163601f,0.694895571222645f,0.727823970732565f,0.51105486079409f,
+  -0.761212733299821f,-0.551940664095241f,0.953323471475834f,-0.915964879494524f,
+  0.199091342904332f,0.0392241713497299f,-0.19261671908783f,-0.6074206562147f,
+  0.345256597791838f,0.031516093623793f,0.563117849295644f,-0.0626584858579093f,
+  -0.491887339392029f,-0.758788812866925f,0.265995340863064f,0.23182766277084f,
+  0.069408408971249f,-0.758115582411847f,-0.495213306068685f,-0.941798830898321f,
+  -0.244099121724069f,-0.496495655783264f,0.217102928532306f,-0.958590442403207f,
+  -0.951376141167081f,-0.454184666450089f,0.886300276933433f,0.487002988091731f,
+  0.0249044829036293f,-0.482416930998085f,0.949278378895364f,0.332136035903019f,
+  0.667897336691517f,-0.903192712329542f,0.0759824890853551f,-0.338269985337703f,
+  -0.29518903317139f,-0.768719116695905f,-0.648362986306786f,0.935201134387952f,
+  0.661626711929564f,0.224599917215244f,0.348525351988449f,-0.972641665331466f,
+  0.996220202383405f,-0.72814219793807f,-0.921619408688976f,-0.134878186992026f,
+  0.496907260713279f,0.544992091878385f,0.238303715804555f,-0.924062793629034f,
+  -0.630686074442814f,-0.865452949495847f,-0.222028734920293f,0.171739204767604f,
+  0.997130539607845f,0.918172604643416f,-0.694626556063078f,0.672880204329672f,
+  -0.175982089302543f,-0.876546457819273f,-0.699672836718442f,0.809662568756764f,
+  0.393205100623341f,-0.108144983913576f,-0.486322989569125f,-0.613674712751319f,
+  -0.317080956716971f,0.471134684264513f,-0.30417867085874f,0.696115419091356f,
+  -0.469268432777419f,0.236211281027806f,-0.772378939124543f,0.460249824926805f,
+  -0.654534877200454f,-0.934852798470303f,0.032756384083477f,-0.66677857258658f,
+  0.314980215822125f,-0.289198088561079f,-0.0476688874127049f,0.0583547444711741f,
+  0.941740852748786f,0.774296795413221f,-0.414622097461695f,0.638345821607103f,
+  -0.358465167682304f,-0.700596445490135f,0.374529194857949f,0.45456008092971f,
+  -0.853620900176985f,0.494280579565469f,-0.217619888424174f,0.956186128386705f,
+  0.0389822381652678f,-0.19316201021536f,0.00512085504598048f,-0.0427176575011998f,
+  0.547932129275615f,0.721865358296469f,0.456248153651011f,0.618884232413038f,
+  0.0410425055490649f,-0.0673903257300594f,-0.218781418704999f,0.528492225462415f,
+  -0.0837068514807342f,0.718697939323846f,-0.603328224259776f,0.875509849594366f,
+  -0.0909494938322766f,-0.280637910576161f,-0.645530682341054f,-0.705120660981081f,
+  -0.529475884017096f,0.140296797228785f,-0.0133440668494202f,0.529843637228154f,
+  -0.25828493683013f,0.619866268930593f,0.47419437241583f,0.447278725151833f,
+  0.821711397258731f,-0.617272470973745f,-0.212155578716597f,-0.794533711360103f,
+  -0.762402399229636f,-0.66554198883216f,0.86799064535122f,-0.61415791629922f,
+  0.650236475570502f,0.838917668255732f,-0.658036726366966f,0.79973808497526f,
+  -0.583232188921549f,-0.958456594727579f,-0.670917275207309f,-0.607735262211108f,
+  0.374114548594356f,0.758519548998692f,-0.831818710697753f,-0.606668498198204f,
+  0.461609482669083f,0.676995424711095f,0.338856030176508f,0.5967074997629f,
+  -0.925180531336558f,0.4696830362291f,-0.0384243216844951f,0.649557593176482f,
+  0.502765260428653f,0.513967467262376f,-0.515358199072217f,-0.444119277197828f,
+  -0.230720891612428f,0.348053063903166f,0.335781438011331f,0.567805000835321f,
+  -0.345275268793607f,0.452852845315761f,-0.823034642564643f,-0.629914051647049f,
+  -0.248509141280827f,0.107760047194603f,0.103293698795021f,0.347988561161273f,
+  -0.103424145197316f,0.151822355394586f,-0.272890315097335f,0.940838684544033f,
+  0.174472591496773f,0.741124360281646f,-0.950217097467039f,-0.922017392413252f,
+  0.496955384825533f,-0.588424819191232f,-0.285012984144181f,-0.292976024259858f,
+  0.30303004349728f,0.748249196092075f,-0.565826698130202f,0.973592191086922f,
+  0.598249548946463f,-0.926358493656323f,-0.241048287909827f,0.996364548389252f,
+  -0.0599127959717052f,0.174629249412504f,-0.96587410625029f,0.980693566420129f,
+  -0.696388116912935f,-0.900221651108609f,-0.706671628163384f,0.990309443320101f,
+  -0.585908086908283,0.414355789768166f,0.456957525423029f,-0.308246583228438f,
+};
+
+#ifdef _VECTORMATH_SOA_TEST
+inline float getfloat( vec_float4 val )
+{
+   union { vec_float4 v; float s[4]; } tmp;
+   tmp.v = val;
+   return tmp.s[0];
+}
+
+vec_float4 randfloat()
+{
+   static int randfloat_count = 0;
+   int idx = randfloat_count;
+   vec_float4 tmp = (vec_float4){randfloats[idx],randfloats[idx],randfloats[idx],randfloats[idx]};
+   randfloat_count = (randfloat_count+1) % 1024;
+   return tmp;
+}
+
+#else
+inline float getfloat( float val )
+{
+  return val;
+}
+
+float randfloat()
+{      
+  static int randfloat_count = 0;
+  float tmp = randfloats[randfloat_count];   
+  randfloat_count = (randfloat_count+1) % 1024;
+  return tmp;
+}
+#endif
+
+#ifdef _VECTORMATH_SCALAR_TEST
+#  define scalar_float(v) (v)
+#else
+#  define scalar_float(v) floatInVec(v)
+#endif
+
+#endif
diff --git a/Extras/vectormathlibrary/tests/test1_aos_c.c b/Extras/vectormathlibrary/tests/test1_aos_c.c
new file mode 100644
index 000000000..527434ea2
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test1_aos_c.c
@@ -0,0 +1,1153 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_AOS_TEST
+
+#include "vectormath_aos.h"
+#include "test.h"
+
+int iteration = 0;
+
+void
+Vector3_methods_test()
+{
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, aos_Vector3_0, aos_Vector3_1, aos_Vector3_2, aos_Vector3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7;
+    VmathVector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2;
+    VmathVector3 tmpV3_8, tmpV3_9;
+    VmathPoint3 tmpP3_0;
+    VmathVector3 tmpV3_10, tmpV3_11, tmpV3_12, tmpV3_13, tmpV3_14, tmpV3_15, tmpV3_16, tmpV3_17, tmpV3_18, tmpV3_19, tmpV3_20, tmpV3_21, tmpV3_22, tmpV3_23, tmpV3_24, tmpV3_25;
+    float rndflt1, rndflt2, rndflt3, rndflt4, pad;
+    float xyz4[12] __attribute__ ((aligned(16)));
+#ifndef _VECTORMATH_SCALAR_TEST
+    vec_float4 quad;
+#endif
+    xyz4[0] = randfloat();
+    xyz4[1] = randfloat();
+    xyz4[2] = randfloat();
+    xyz4[3] = randfloat();
+    xyz4[4] = randfloat();
+    xyz4[5] = randfloat();
+    xyz4[6] = randfloat();
+    xyz4[7] = randfloat();
+    xyz4[8] = randfloat();
+    xyz4[9] = randfloat();
+    xyz4[10] = randfloat();
+    xyz4[11] = randfloat();
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3MakeFromScalar( &c_Vector3, 0.0f );
+    vmathV3MakeFromScalar( &d_Vector3, 0.0f );
+    vmathV3MakeFromScalar( &e_Vector3, 0.0f );
+    vmathV3Prints( &c_Vector3, "set Vector3 elements to zero" );
+    vmathV3Prints( &d_Vector3, "set Vector3 elements to zero" );
+    vmathV3Prints( &e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4MakeFromScalar( &c_Vector4, 0.0f );
+    vmathV4MakeFromScalar( &d_Vector4, 0.0f );
+    vmathV4MakeFromScalar( &e_Vector4, 0.0f );
+    vmathV4Prints( &c_Vector4, "set Vector4 elements to zero" );
+    vmathV4Prints( &d_Vector4, "set Vector4 elements to zero" );
+    vmathV4Prints( &e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3MakeFromScalar( &c_Point3, 0.0f );
+    vmathP3MakeFromScalar( &d_Point3, 0.0f );
+    vmathP3MakeFromScalar( &e_Point3, 0.0f );
+    vmathP3Prints( &c_Point3, "set Point3 elements to zero" );
+    vmathP3Prints( &d_Point3, "set Point3 elements to zero" );
+    vmathP3Prints( &e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQMakeFromScalar( &c_Quat, 0.0f );
+    vmathQMakeFromScalar( &d_Quat, 0.0f );
+    vmathQMakeFromScalar( &e_Quat, 0.0f );
+    vmathQPrints( &c_Quat, "set Quat elements to zero" );
+    vmathQPrints( &d_Quat, "set Quat elements to zero" );
+    vmathQPrints( &e_Quat, "set Quat elements to zero" );
+    vmathV3MakeFromP3( &a_Vector3, &a_Point3 );
+    vmathV3Prints( &a_Vector3, "construct Vector3 with Point3" );
+    vmathV3MakeFromScalar( &a_Vector3, randfloat() );
+    vmathV3Prints( &a_Vector3, "set Vector3 with float" );
+    vmathV3MakeFromScalar( &a_Vector3, randfloat() );
+    vmathV3Prints( &a_Vector3, "set Vector3 with float" );
+    vmathV3MakeFromElems( &aos_Vector3_0, 0.0f, 1.0f, 2.0f );
+    vmathV3MakeFromElems( &aos_Vector3_1, 3.0f, 4.0f, 5.0f );
+    vmathV3MakeFromElems( &aos_Vector3_2, 6.0f, 7.0f, 8.0f );
+    vmathV3MakeFromElems( &aos_Vector3_3, 9.0f, 10.0f, 11.0f );
+    vmathV3Prints( &aos_Vector3_3, "aos type 0" );
+    vmathV3Prints( &aos_Vector3_2, "aos type 1" );
+    vmathV3Prints( &aos_Vector3_1, "aos type 2" );
+    vmathV3Prints( &aos_Vector3_0, "aos type 3" );
+    vmathV3Select( &tmpV3_4, &a_Vector3, &b_Vector3, 0 );
+    vmathV3Prints( &tmpV3_4, "select 0" );
+    vmathV3Select( &tmpV3_5, &a_Vector3, &b_Vector3, 1 );
+    vmathV3Prints( &tmpV3_5, "select 1" );
+    vmathV3Select( &tmpV3_6, &a_Vector3, &b_Vector3, 0 );
+    vmathV3Prints( &tmpV3_6, "select 2" );
+    vmathV3Select( &tmpV3_7, &a_Vector3, &b_Vector3, (unsigned int)-1 );
+    vmathV3Prints( &tmpV3_7, "select 3" );
+    vmathV3MakeFromElems( &a_Vector3, xyz4[0], xyz4[1], xyz4[2] );
+    vmathV3Prints( &a_Vector3, "load XYZ array" );
+    xyz4[0] = -xyz4[0];
+    xyz4[1] = -xyz4[1];
+    xyz4[2] = -xyz4[2];
+    xyz4[3] = -xyz4[3];
+    xyz4[4] = -xyz4[4];
+    xyz4[5] = -xyz4[5];
+    xyz4[6] = -xyz4[6];
+    xyz4[7] = -xyz4[7];
+    xyz4[8] = -xyz4[8];
+    xyz4[9] = -xyz4[9];
+    xyz4[10] = -xyz4[10];
+    xyz4[11] = -xyz4[11];
+    vmathV4MakeFromElems( &aos_Vector4_0, xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    vmathV4MakeFromElems( &aos_Vector4_1, xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    vmathV4MakeFromElems( &aos_Vector4_2, xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    vmathV4Prints( &aos_Vector4_0, "xyzx" );
+    vmathV4Prints( &aos_Vector4_1, "yzxy" );
+    vmathV4Prints( &aos_Vector4_2, "zxyz" );
+#ifndef _VECTORMATH_SCALAR_TEST
+    vmathV3LoadXYZArray( &aos_Vector3_0, &aos_Vector3_1, &aos_Vector3_2, &aos_Vector3_3, (const vec_float4 *)xyz4 );
+    xyz4[0] = 0;
+    xyz4[1] = 1;
+    xyz4[2] = 2;
+    xyz4[3] = 3;
+    xyz4[4] = 4;
+    xyz4[5] = 5;
+    xyz4[6] = 6;
+    xyz4[7] = 7;
+    xyz4[8] = 8;
+    xyz4[9] = 9;
+    xyz4[10] = 10;
+    xyz4[11] = 11;
+    vmathV3StoreXYZArray( &aos_Vector3_0, &aos_Vector3_1, &aos_Vector3_2, &aos_Vector3_3, (vec_float4 *)xyz4 );
+#endif
+    vmathV4MakeFromElems( &aos_Vector4_0, xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    vmathV4MakeFromElems( &aos_Vector4_1, xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    vmathV4MakeFromElems( &aos_Vector4_2, xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    vmathV4Prints( &aos_Vector4_0, "xyzx" );
+    vmathV4Prints( &aos_Vector4_1, "yzxy" );
+    vmathV4Prints( &aos_Vector4_2, "zxyz" );
+#ifdef _VECTORMATH_SCALAR_TEST
+    printf("storeXYZ:-1.0 -2.0 -3.0 0.4\n");
+#else
+    quad = (vec_float4){-1.0f, -2.0f, -3.0f, -4.0f};
+    a_Vector3.vec128 = quad;
+    quad = (vec_float4){0.1f, 0.2f, 0.3f, 0.4f};
+    vmathV3StoreXYZ( &a_Vector3, &quad );
+    printf("storeXYZ:%f %f %f %f\n", ((float *)&quad)[0], ((float *)&quad)[1], ((float *)&quad)[2], ((float *)&quad)[3]);
+#endif
+    vmathV3Copy( &a_Vector3, &b_Vector3 );
+    vmathV3Prints( &a_Vector3, "assign to Vector3 from Vector3" );
+    vmathV3MakeFromScalar( &a_Vector3, 0.0f );
+    vmathV3Prints( &a_Vector3, "set Vector3 elements to zero" );
+    vmathV3MakeXAxis( &a_Vector3 );
+    vmathV3Prints( &a_Vector3, "set to x axis" );
+    vmathV3MakeYAxis( &a_Vector3 );
+    vmathV3Prints( &a_Vector3, "set to y axis" );
+    vmathV3MakeZAxis( &a_Vector3 );
+    vmathV3Prints( &a_Vector3, "set to z axis" );
+    vmathV3SetElem( &a_Vector3, 0, randfloat() );
+    vmathV3Prints( &a_Vector3, "Vector3::set( 0, float )" );
+    vmathV3SetElem( &a_Vector3, 0, randfloat() );
+    vmathV3SetElem( &a_Vector3, 0, ( vmathV3GetElem( &a_Vector3, 0 ) * randfloat() ) );
+    vmathV3SetElem( &a_Vector3, 0, ( vmathV3GetElem( &a_Vector3, 0 ) / randfloat() ) );
+    vmathV3SetElem( &a_Vector3, 0, ( vmathV3GetElem( &a_Vector3, 0 ) + randfloat() ) );
+    vmathV3SetElem( &a_Vector3, 0, ( vmathV3GetElem( &a_Vector3, 0 ) - randfloat() ) );
+    vmathV3Prints( &a_Vector3, "Vector3::operator [](0)" );
+    vmathV3SetX( &a_Vector3, randfloat() );
+    vmathV3Prints( &a_Vector3, "Vector3::setX()" );
+    vmathV3SetElem( &a_Vector3, 1, randfloat() );
+    vmathV3Prints( &a_Vector3, "Vector3::set( 1, float )" );
+    vmathV3SetElem( &a_Vector3, 1, randfloat() );
+    vmathV3SetElem( &a_Vector3, 1, ( vmathV3GetElem( &a_Vector3, 1 ) * randfloat() ) );
+    vmathV3SetElem( &a_Vector3, 1, ( vmathV3GetElem( &a_Vector3, 1 ) / randfloat() ) );
+    vmathV3SetElem( &a_Vector3, 1, ( vmathV3GetElem( &a_Vector3, 1 ) + randfloat() ) );
+    vmathV3SetElem( &a_Vector3, 1, ( vmathV3GetElem( &a_Vector3, 1 ) - randfloat() ) );
+    vmathV3Prints( &a_Vector3, "Vector3::operator [](1)" );
+    vmathV3SetY( &a_Vector3, randfloat() );
+    vmathV3Prints( &a_Vector3, "Vector3::setY()" );
+    vmathV3SetElem( &a_Vector3, 2, randfloat() );
+    vmathV3Prints( &a_Vector3, "Vector3::set( 2, float )" );
+    vmathV3SetElem( &a_Vector3, 2, randfloat() );
+    vmathV3SetElem( &a_Vector3, 2, ( vmathV3GetElem( &a_Vector3, 2 ) * randfloat() ) );
+    vmathV3SetElem( &a_Vector3, 2, ( vmathV3GetElem( &a_Vector3, 2 ) / randfloat() ) );
+    vmathV3SetElem( &a_Vector3, 2, ( vmathV3GetElem( &a_Vector3, 2 ) + randfloat() ) );
+    vmathV3SetElem( &a_Vector3, 2, ( vmathV3GetElem( &a_Vector3, 2 ) - randfloat() ) );
+    vmathV3Prints( &a_Vector3, "Vector3::operator [](2)" );
+    vmathV3SetZ( &a_Vector3, randfloat() );
+    vmathV3Prints( &a_Vector3, "Vector3::setZ()" );
+    printf("Vector3::get( 0 ): %f\n", getfloat(vmathV3GetElem( &a_Vector3, 0 )) );
+    printf("Vector3::operator []( 0 ): %f\n", getfloat(vmathV3GetElem( &a_Vector3, 0 )) );
+    printf("Vector3::getX(): %f\n", getfloat(vmathV3GetX( &a_Vector3 )) );
+    printf("Vector3::get( 1 ): %f\n", getfloat(vmathV3GetElem( &a_Vector3, 1 )) );
+    printf("Vector3::operator []( 1 ): %f\n", getfloat(vmathV3GetElem( &a_Vector3, 1 )) );
+    printf("Vector3::getY(): %f\n", getfloat(vmathV3GetY( &a_Vector3 )) );
+    printf("Vector3::get( 2 ): %f\n", getfloat(vmathV3GetElem( &a_Vector3, 2 )) );
+    printf("Vector3::operator []( 2 ): %f\n", getfloat(vmathV3GetElem( &a_Vector3, 2 )) );
+    printf("Vector3::getZ(): %f\n", getfloat(vmathV3GetZ( &a_Vector3 )) );
+    vmathV3Add( &tmpV3_8, &a_Vector3, &b_Vector3 );
+    vmathV3Prints( &tmpV3_8, "Vector3 + Vector3" );
+    vmathV3Sub( &tmpV3_9, &a_Vector3, &b_Vector3 );
+    vmathV3Prints( &tmpV3_9, "Vector3 - Vector3" );
+    vmathV3AddP3( &tmpP3_0, &a_Vector3, &b_Point3 );
+    vmathP3Prints( &tmpP3_0, "Vector3 + Point3" );
+    vmathV3ScalarMul( &tmpV3_10, &a_Vector3, randfloat() );
+    vmathV3Prints( &tmpV3_10, "Vector3 * float" );
+    vmathV3ScalarDiv( &tmpV3_11, &a_Vector3, randfloat() );
+    vmathV3Prints( &tmpV3_11, "Vector3 / float" );
+    vmathV3ScalarMul( &tmpV3_12, &a_Vector3, randfloat() );
+    vmathV3Prints( &tmpV3_12, "float * Vector3" );
+    vmathV3Neg( &tmpV3_13, &a_Vector3 );
+    vmathV3Prints( &tmpV3_13, "Vector3 negate" );
+    vmathV3MulPerElem( &tmpV3_14, &a_Vector3, &b_Vector3 );
+    vmathV3Prints( &tmpV3_14, "mulPerElem( Vector3, Vector3 )" );
+    vmathV3DivPerElem( &tmpV3_15, &a_Vector3, &b_Vector3 );
+    vmathV3Prints( &tmpV3_15, "divPerElem( Vector3, Vector3 )" );
+    vmathV3RecipPerElem( &tmpV3_16, &a_Vector3 );
+    vmathV3Prints( &tmpV3_16, "Vector3 recip" );
+    vmathV3AbsPerElem( &tmpV3_17, &a_Vector3 );
+    vmathV3SqrtPerElem( &tmpV3_18, &tmpV3_17 );
+    vmathV3Prints( &tmpV3_18, "Vector3 sqrt" );
+    vmathV3AbsPerElem( &tmpV3_19, &a_Vector3 );
+    vmathV3RsqrtPerElem( &tmpV3_20, &tmpV3_19 );
+    vmathV3Prints( &tmpV3_20, "Vector3 rsqrt" );
+    vmathV3AbsPerElem( &tmpV3_21, &a_Vector3 );
+    vmathV3Prints( &tmpV3_21, "Vector3 abs" );
+    vmathV3CopySignPerElem( &tmpV3_22, &a_Vector3, &b_Vector3 );
+    vmathV3Prints( &tmpV3_22, "Vector3 copySign" );
+    vmathV3MaxPerElem( &tmpV3_23, &a_Vector3, &b_Vector3 );
+    vmathV3Prints( &tmpV3_23, "Vector3 maximum Vector3" );
+    vmathV3MinPerElem( &tmpV3_24, &a_Vector3, &b_Vector3 );
+    vmathV3Prints( &tmpV3_24, "Vector3 minimum Vector3" );
+    printf("Vector3 maximum of elements: %f\n", getfloat(vmathV3MaxElem( &a_Vector3 )));
+    printf("Vector3 minimum of elements: %f\n", getfloat(vmathV3MinElem( &a_Vector3 )));
+    printf("Vector3 sum of elements: %f\n", getfloat(vmathV3Sum( &a_Vector3 )));
+    printf("Vector3 dot Vector3: %f\n", getfloat(vmathV3Dot( &a_Vector3, &b_Vector3 )));
+    printf("Vector3 lengthSqr: %f\n", getfloat(vmathV3LengthSqr( &a_Vector3 )));
+    printf("Vector3 length: %f\n", getfloat(vmathV3Length( &a_Vector3 )));
+    vmathV3Normalize( &tmpV3_25, &a_Vector3 );
+    vmathV3Prints( &tmpV3_25, "Vector3 normalized" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &e_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV3Normalize( &b_Vector3, &b_Vector3 );
+    vmathV3Normalize( &c_Vector3, &c_Vector3 );
+    vmathV3Normalize( &d_Vector3, &d_Vector3 );
+    vmathV3Normalize( &e_Vector3, &e_Vector3 );
+    vmathV3Lerp( &a_Vector3, randfloat(), &b_Vector3, &c_Vector3 );
+    vmathV3Prints( &a_Vector3, "Vector3 lerp" );
+    vmathV3Slerp( &a_Vector3, randfloat(), &b_Vector3, &c_Vector3 );
+    vmathV3Prints( &a_Vector3, "Vector3 slerp" );
+}
+
+void
+Vector4_methods_test()
+{
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    VmathVector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2, aos_Vector4_3, tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3;
+    VmathVector3 tmpV3_4;
+    VmathVector4 tmpV4_4, tmpV4_5, tmpV4_6, tmpV4_7, tmpV4_8, tmpV4_9, tmpV4_10, tmpV4_11, tmpV4_12, tmpV4_13, tmpV4_14, tmpV4_15, tmpV4_16, tmpV4_17, tmpV4_18, tmpV4_19, tmpV4_20, tmpV4_21;
+    float rndflt1, rndflt2, rndflt3, rndflt4, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3MakeFromScalar( &c_Vector3, 0.0f );
+    vmathV3MakeFromScalar( &d_Vector3, 0.0f );
+    vmathV3MakeFromScalar( &e_Vector3, 0.0f );
+    vmathV3Prints( &c_Vector3, "set Vector3 elements to zero" );
+    vmathV3Prints( &d_Vector3, "set Vector3 elements to zero" );
+    vmathV3Prints( &e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4MakeFromScalar( &c_Vector4, 0.0f );
+    vmathV4MakeFromScalar( &d_Vector4, 0.0f );
+    vmathV4MakeFromScalar( &e_Vector4, 0.0f );
+    vmathV4Prints( &c_Vector4, "set Vector4 elements to zero" );
+    vmathV4Prints( &d_Vector4, "set Vector4 elements to zero" );
+    vmathV4Prints( &e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3MakeFromScalar( &c_Point3, 0.0f );
+    vmathP3MakeFromScalar( &d_Point3, 0.0f );
+    vmathP3MakeFromScalar( &e_Point3, 0.0f );
+    vmathP3Prints( &c_Point3, "set Point3 elements to zero" );
+    vmathP3Prints( &d_Point3, "set Point3 elements to zero" );
+    vmathP3Prints( &e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQMakeFromScalar( &c_Quat, 0.0f );
+    vmathQMakeFromScalar( &d_Quat, 0.0f );
+    vmathQMakeFromScalar( &e_Quat, 0.0f );
+    vmathQPrints( &c_Quat, "set Quat elements to zero" );
+    vmathQPrints( &d_Quat, "set Quat elements to zero" );
+    vmathQPrints( &e_Quat, "set Quat elements to zero" );
+    vmathV4MakeFromV3Scalar( &a_Vector4, &a_Vector3, randfloat() );
+    vmathV4Prints( &a_Vector4, "set Vector4 with Vector3, float" );
+    vmathV4MakeFromV3( &a_Vector4, &a_Vector3 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with Vector3" );
+    vmathV4MakeFromP3( &a_Vector4, &a_Point3 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with Point3" );
+    vmathV4MakeFromQ( &a_Vector4, &a_Quat );
+    vmathV4Prints( &a_Vector4, "construct Vector4 with Quat" );
+    vmathV4MakeFromScalar( &a_Vector4, randfloat() );
+    vmathV4Prints( &a_Vector4, "set Vector4 with float" );
+    vmathV4MakeFromScalar( &a_Vector4, randfloat() );
+    vmathV4Prints( &a_Vector4, "set Vector4 with float" );
+    vmathV4MakeFromElems( &aos_Vector4_0, 0.0f, 1.0f, 2.0f, 3.0f );
+    vmathV4MakeFromElems( &aos_Vector4_1, 4.0f, 5.0f, 6.0f, 7.0f );
+    vmathV4MakeFromElems( &aos_Vector4_2, 8.0f, 9.0f, 10.0f, 11.0f );
+    vmathV4MakeFromElems( &aos_Vector4_3, 12.0f, 13.0f, 14.0f, 15.0f );
+    vmathV4Prints( &aos_Vector4_3, "aos type 0" );
+    vmathV4Prints( &aos_Vector4_2, "aos type 1" );
+    vmathV4Prints( &aos_Vector4_1, "aos type 2" );
+    vmathV4Prints( &aos_Vector4_0, "aos type 3" );
+    vmathV4Select( &tmpV4_0, &a_Vector4, &b_Vector4, 0 );
+    vmathV4Prints( &tmpV4_0, "select 0" );
+    vmathV4Select( &tmpV4_1, &a_Vector4, &b_Vector4, 1 );
+    vmathV4Prints( &tmpV4_1, "select 1" );
+    vmathV4Select( &tmpV4_2, &a_Vector4, &b_Vector4, 0 );
+    vmathV4Prints( &tmpV4_2, "select 2" );
+    vmathV4Select( &tmpV4_3, &a_Vector4, &b_Vector4, (unsigned int)-1 );
+    vmathV4Prints( &tmpV4_3, "select 3" );
+    vmathV4Copy( &a_Vector4, &b_Vector4 );
+    vmathV4Prints( &a_Vector4, "assign to Vector4 from Vector4" );
+    vmathV4SetXYZ( &a_Vector4, &a_Vector3 );
+    vmathV4Prints( &a_Vector4, "set Vector4 xyz" );
+    vmathV4GetXYZ( &tmpV3_4, &a_Vector4 );
+    vmathV3Prints( &tmpV3_4, "get Vector4 xyz" );
+    vmathV4MakeFromScalar( &a_Vector4, 0.0f );
+    vmathV4Prints( &a_Vector4, "set Vector4 elements to zero" );
+    vmathV4MakeXAxis( &a_Vector4 );
+    vmathV4Prints( &a_Vector4, "set to x axis" );
+    vmathV4MakeYAxis( &a_Vector4 );
+    vmathV4Prints( &a_Vector4, "set to y axis" );
+    vmathV4MakeZAxis( &a_Vector4 );
+    vmathV4Prints( &a_Vector4, "set to z axis" );
+    vmathV4MakeWAxis( &a_Vector4 );
+    vmathV4Prints( &a_Vector4, "set to w axis" );
+    vmathV4SetElem( &a_Vector4, 0, randfloat() );
+    vmathV4Prints( &a_Vector4, "Vector4::set( 0, float )" );
+    vmathV4SetElem( &a_Vector4, 0, randfloat() );
+    vmathV4SetElem( &a_Vector4, 0, ( vmathV4GetElem( &a_Vector4, 0 ) * randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 0, ( vmathV4GetElem( &a_Vector4, 0 ) / randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 0, ( vmathV4GetElem( &a_Vector4, 0 ) + randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 0, ( vmathV4GetElem( &a_Vector4, 0 ) - randfloat() ) );
+    vmathV4Prints( &a_Vector4, "Vector4::operator [](0)" );
+    vmathV4SetX( &a_Vector4, randfloat() );
+    vmathV4Prints( &a_Vector4, "Vector4::setX()" );
+    vmathV4SetElem( &a_Vector4, 1, randfloat() );
+    vmathV4Prints( &a_Vector4, "Vector4::set( 1, float )" );
+    vmathV4SetElem( &a_Vector4, 1, randfloat() );
+    vmathV4SetElem( &a_Vector4, 1, ( vmathV4GetElem( &a_Vector4, 1 ) * randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 1, ( vmathV4GetElem( &a_Vector4, 1 ) / randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 1, ( vmathV4GetElem( &a_Vector4, 1 ) + randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 1, ( vmathV4GetElem( &a_Vector4, 1 ) - randfloat() ) );
+    vmathV4Prints( &a_Vector4, "Vector4::operator [](1)" );
+    vmathV4SetY( &a_Vector4, randfloat() );
+    vmathV4Prints( &a_Vector4, "Vector4::setY()" );
+    vmathV4SetElem( &a_Vector4, 2, randfloat() );
+    vmathV4Prints( &a_Vector4, "Vector4::set( 2, float )" );
+    vmathV4SetElem( &a_Vector4, 2, randfloat() );
+    vmathV4SetElem( &a_Vector4, 2, ( vmathV4GetElem( &a_Vector4, 2 ) * randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 2, ( vmathV4GetElem( &a_Vector4, 2 ) / randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 2, ( vmathV4GetElem( &a_Vector4, 2 ) + randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 2, ( vmathV4GetElem( &a_Vector4, 2 ) - randfloat() ) );
+    vmathV4Prints( &a_Vector4, "Vector4::operator [](2)" );
+    vmathV4SetZ( &a_Vector4, randfloat() );
+    vmathV4Prints( &a_Vector4, "Vector4::setZ()" );
+    vmathV4SetElem( &a_Vector4, 3, randfloat() );
+    vmathV4Prints( &a_Vector4, "Vector4::set( 3, float )" );
+    vmathV4SetElem( &a_Vector4, 3, randfloat() );
+    vmathV4SetElem( &a_Vector4, 3, ( vmathV4GetElem( &a_Vector4, 3 ) * randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 3, ( vmathV4GetElem( &a_Vector4, 3 ) / randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 3, ( vmathV4GetElem( &a_Vector4, 3 ) + randfloat() ) );
+    vmathV4SetElem( &a_Vector4, 3, ( vmathV4GetElem( &a_Vector4, 3 ) - randfloat() ) );
+    vmathV4Prints( &a_Vector4, "Vector4::operator [](3)" );
+    vmathV4SetW( &a_Vector4, randfloat() );
+    vmathV4Prints( &a_Vector4, "Vector4::setW()" );
+    printf("Vector4::get( 0 ): %f\n", getfloat(vmathV4GetElem( &a_Vector4, 0 )) );
+    printf("Vector4::operator []( 0 ): %f\n", getfloat(vmathV4GetElem( &a_Vector4, 0 )) );
+    printf("Vector4::getX(): %f\n", getfloat(vmathV4GetX( &a_Vector4 )) );
+    printf("Vector4::get( 1 ): %f\n", getfloat(vmathV4GetElem( &a_Vector4, 1 )) );
+    printf("Vector4::operator []( 1 ): %f\n", getfloat(vmathV4GetElem( &a_Vector4, 1 )) );
+    printf("Vector4::getY(): %f\n", getfloat(vmathV4GetY( &a_Vector4 )) );
+    printf("Vector4::get( 2 ): %f\n", getfloat(vmathV4GetElem( &a_Vector4, 2 )) );
+    printf("Vector4::operator []( 2 ): %f\n", getfloat(vmathV4GetElem( &a_Vector4, 2 )) );
+    printf("Vector4::getZ(): %f\n", getfloat(vmathV4GetZ( &a_Vector4 )) );
+    printf("Vector4::get( 3 ): %f\n", getfloat(vmathV4GetElem( &a_Vector4, 3 )) );
+    printf("Vector4::operator []( 3 ): %f\n", getfloat(vmathV4GetElem( &a_Vector4, 3 )) );
+    printf("Vector4::getW(): %f\n", getfloat(vmathV4GetW( &a_Vector4 )) );
+    vmathV4Add( &tmpV4_4, &a_Vector4, &b_Vector4 );
+    vmathV4Prints( &tmpV4_4, "Vector4 + Vector4" );
+    vmathV4Sub( &tmpV4_5, &a_Vector4, &b_Vector4 );
+    vmathV4Prints( &tmpV4_5, "Vector4 - Vector4" );
+    vmathV4ScalarMul( &tmpV4_6, &a_Vector4, randfloat() );
+    vmathV4Prints( &tmpV4_6, "Vector4 * float" );
+    vmathV4ScalarDiv( &tmpV4_7, &a_Vector4, randfloat() );
+    vmathV4Prints( &tmpV4_7, "Vector4 / float" );
+    vmathV4ScalarMul( &tmpV4_8, &a_Vector4, randfloat() );
+    vmathV4Prints( &tmpV4_8, "float * Vector4" );
+    vmathV4Neg( &tmpV4_9, &a_Vector4 );
+    vmathV4Prints( &tmpV4_9, "Vector4 negate" );
+    vmathV4MulPerElem( &tmpV4_10, &a_Vector4, &b_Vector4 );
+    vmathV4Prints( &tmpV4_10, "mulPerElem( Vector4, Vector4 )" );
+    vmathV4DivPerElem( &tmpV4_11, &a_Vector4, &b_Vector4 );
+    vmathV4Prints( &tmpV4_11, "divPerElem( Vector4, Vector4 )" );
+    vmathV4RecipPerElem( &tmpV4_12, &a_Vector4 );
+    vmathV4Prints( &tmpV4_12, "Vector4 recip" );
+    vmathV4AbsPerElem( &tmpV4_13, &a_Vector4 );
+    vmathV4SqrtPerElem( &tmpV4_14, &tmpV4_13 );
+    vmathV4Prints( &tmpV4_14, "Vector4 sqrt" );
+    vmathV4AbsPerElem( &tmpV4_15, &a_Vector4 );
+    vmathV4RsqrtPerElem( &tmpV4_16, &tmpV4_15 );
+    vmathV4Prints( &tmpV4_16, "Vector4 rsqrt" );
+    vmathV4AbsPerElem( &tmpV4_17, &a_Vector4 );
+    vmathV4Prints( &tmpV4_17, "Vector4 abs" );
+    vmathV4CopySignPerElem( &tmpV4_18, &a_Vector4, &b_Vector4 );
+    vmathV4Prints( &tmpV4_18, "Vector4 copySign" );
+    vmathV4MaxPerElem( &tmpV4_19, &a_Vector4, &b_Vector4 );
+    vmathV4Prints( &tmpV4_19, "Vector4 maximum Vector4" );
+    vmathV4MinPerElem( &tmpV4_20, &a_Vector4, &b_Vector4 );
+    vmathV4Prints( &tmpV4_20, "Vector4 minimum Vector4" );
+    printf("Vector4 maximum of elements: %f\n", getfloat(vmathV4MaxElem( &a_Vector4 )));
+    printf("Vector4 minimum of elements: %f\n", getfloat(vmathV4MinElem( &a_Vector4 )));
+    printf("Vector4 sum of elements: %f\n", getfloat(vmathV4Sum( &a_Vector4 )));
+    printf("Vector4 dot Vector4: %f\n", getfloat(vmathV4Dot( &a_Vector4, &b_Vector4 )));
+    printf("Vector4 lengthSqr: %f\n", getfloat(vmathV4LengthSqr( &a_Vector4 )));
+    printf("Vector4 length: %f\n", getfloat(vmathV4Length( &a_Vector4 )));
+    vmathV4Normalize( &tmpV4_21, &a_Vector4 );
+    vmathV4Prints( &tmpV4_21, "Vector4 normalized" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &e_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Normalize( &b_Vector4, &b_Vector4 );
+    vmathV4Normalize( &c_Vector4, &c_Vector4 );
+    vmathV4Normalize( &d_Vector4, &d_Vector4 );
+    vmathV4Normalize( &e_Vector4, &e_Vector4 );
+    vmathV4Lerp( &a_Vector4, randfloat(), &b_Vector4, &c_Vector4 );
+    vmathV4Prints( &a_Vector4, "Vector4 lerp" );
+    vmathV4Slerp( &a_Vector4, randfloat(), &b_Vector4, &c_Vector4 );
+    vmathV4Prints( &a_Vector4, "Vector4 slerp" );
+}
+
+void
+Point3_methods_test()
+{
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    VmathPoint3 aos_Point3_0, aos_Point3_1, aos_Point3_2, aos_Point3_3, tmpP3_0, tmpP3_1, tmpP3_2, tmpP3_3;
+    VmathVector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2;
+    VmathVector3 tmpV3_4;
+    VmathPoint3 tmpP3_4, tmpP3_5, tmpP3_6, tmpP3_7, tmpP3_8, tmpP3_9, tmpP3_10, tmpP3_11, tmpP3_12, tmpP3_13, tmpP3_14, tmpP3_15, tmpP3_16;
+    float rndflt1, rndflt2, rndflt3, rndflt4, pad;
+    float xyz4[12] __attribute__ ((aligned(16)));
+#ifndef _VECTORMATH_SCALAR_TEST
+    vec_float4 quad;
+#endif
+    xyz4[0] = randfloat();
+    xyz4[1] = randfloat();
+    xyz4[2] = randfloat();
+    xyz4[3] = randfloat();
+    xyz4[4] = randfloat();
+    xyz4[5] = randfloat();
+    xyz4[6] = randfloat();
+    xyz4[7] = randfloat();
+    xyz4[8] = randfloat();
+    xyz4[9] = randfloat();
+    xyz4[10] = randfloat();
+    xyz4[11] = randfloat();
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3MakeFromScalar( &c_Vector3, 0.0f );
+    vmathV3MakeFromScalar( &d_Vector3, 0.0f );
+    vmathV3MakeFromScalar( &e_Vector3, 0.0f );
+    vmathV3Prints( &c_Vector3, "set Vector3 elements to zero" );
+    vmathV3Prints( &d_Vector3, "set Vector3 elements to zero" );
+    vmathV3Prints( &e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4MakeFromScalar( &c_Vector4, 0.0f );
+    vmathV4MakeFromScalar( &d_Vector4, 0.0f );
+    vmathV4MakeFromScalar( &e_Vector4, 0.0f );
+    vmathV4Prints( &c_Vector4, "set Vector4 elements to zero" );
+    vmathV4Prints( &d_Vector4, "set Vector4 elements to zero" );
+    vmathV4Prints( &e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3MakeFromScalar( &c_Point3, 0.0f );
+    vmathP3MakeFromScalar( &d_Point3, 0.0f );
+    vmathP3MakeFromScalar( &e_Point3, 0.0f );
+    vmathP3Prints( &c_Point3, "set Point3 elements to zero" );
+    vmathP3Prints( &d_Point3, "set Point3 elements to zero" );
+    vmathP3Prints( &e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQMakeFromScalar( &c_Quat, 0.0f );
+    vmathQMakeFromScalar( &d_Quat, 0.0f );
+    vmathQMakeFromScalar( &e_Quat, 0.0f );
+    vmathQPrints( &c_Quat, "set Quat elements to zero" );
+    vmathQPrints( &d_Quat, "set Quat elements to zero" );
+    vmathQPrints( &e_Quat, "set Quat elements to zero" );
+    vmathP3MakeFromV3( &a_Point3, &a_Vector3 );
+    vmathP3Prints( &a_Point3, "construct Point3 with Vector3" );
+    vmathP3MakeFromScalar( &a_Point3, randfloat() );
+    vmathP3Prints( &a_Point3, "set Point3 with float" );
+    vmathP3MakeFromScalar( &a_Point3, randfloat() );
+    vmathP3Prints( &a_Point3, "set Point3 with float" );
+    vmathP3MakeFromElems( &aos_Point3_0, 0.0f, 1.0f, 2.0f );
+    vmathP3MakeFromElems( &aos_Point3_1, 3.0f, 4.0f, 5.0f );
+    vmathP3MakeFromElems( &aos_Point3_2, 6.0f, 7.0f, 8.0f );
+    vmathP3MakeFromElems( &aos_Point3_3, 9.0f, 10.0f, 11.0f );
+    vmathP3Prints( &aos_Point3_3, "aos type 0" );
+    vmathP3Prints( &aos_Point3_2, "aos type 1" );
+    vmathP3Prints( &aos_Point3_1, "aos type 2" );
+    vmathP3Prints( &aos_Point3_0, "aos type 3" );
+    vmathP3Select( &tmpP3_0, &a_Point3, &b_Point3, 0 );
+    vmathP3Prints( &tmpP3_0, "select 0" );
+    vmathP3Select( &tmpP3_1, &a_Point3, &b_Point3, 1 );
+    vmathP3Prints( &tmpP3_1, "select 1" );
+    vmathP3Select( &tmpP3_2, &a_Point3, &b_Point3, 0 );
+    vmathP3Prints( &tmpP3_2, "select 2" );
+    vmathP3Select( &tmpP3_3, &a_Point3, &b_Point3, (unsigned int)-1 );
+    vmathP3Prints( &tmpP3_3, "select 3" );
+    vmathP3MakeFromElems( &a_Point3, xyz4[0], xyz4[1], xyz4[2] );
+    vmathP3Prints( &a_Point3, "load XYZ array" );
+    xyz4[0] = -xyz4[0];
+    xyz4[1] = -xyz4[1];
+    xyz4[2] = -xyz4[2];
+    xyz4[3] = -xyz4[3];
+    xyz4[4] = -xyz4[4];
+    xyz4[5] = -xyz4[5];
+    xyz4[6] = -xyz4[6];
+    xyz4[7] = -xyz4[7];
+    xyz4[8] = -xyz4[8];
+    xyz4[9] = -xyz4[9];
+    xyz4[10] = -xyz4[10];
+    xyz4[11] = -xyz4[11];
+    vmathV4MakeFromElems( &aos_Vector4_0, xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    vmathV4MakeFromElems( &aos_Vector4_1, xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    vmathV4MakeFromElems( &aos_Vector4_2, xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    vmathV4Prints( &aos_Vector4_0, "xyzx" );
+    vmathV4Prints( &aos_Vector4_1, "yzxy" );
+    vmathV4Prints( &aos_Vector4_2, "zxyz" );
+#ifndef _VECTORMATH_SCALAR_TEST
+    vmathP3LoadXYZArray( &aos_Point3_0, &aos_Point3_1, &aos_Point3_2, &aos_Point3_3, (const vec_float4 *)xyz4 );
+    xyz4[0] = 0;
+    xyz4[1] = 1;
+    xyz4[2] = 2;
+    xyz4[3] = 3;
+    xyz4[4] = 4;
+    xyz4[5] = 5;
+    xyz4[6] = 6;
+    xyz4[7] = 7;
+    xyz4[8] = 8;
+    xyz4[9] = 9;
+    xyz4[10] = 10;
+    xyz4[11] = 11;
+    vmathP3StoreXYZArray( &aos_Point3_0, &aos_Point3_1, &aos_Point3_2, &aos_Point3_3, (vec_float4 *)xyz4 );
+#endif
+    vmathV4MakeFromElems( &aos_Vector4_0, xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    vmathV4MakeFromElems( &aos_Vector4_1, xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    vmathV4MakeFromElems( &aos_Vector4_2, xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    vmathV4Prints( &aos_Vector4_0, "xyzx" );
+    vmathV4Prints( &aos_Vector4_1, "yzxy" );
+    vmathV4Prints( &aos_Vector4_2, "zxyz" );
+#ifdef _VECTORMATH_SCALAR_TEST
+    printf("storeXYZ:-1.0 -2.0 -3.0 0.4\n");
+#else
+    quad = (vec_float4){-1.0f, -2.0f, -3.0f, -4.0f};
+    a_Point3.vec128 = quad;
+    quad = (vec_float4){0.1f, 0.2f, 0.3f, 0.4f};
+    vmathP3StoreXYZ( &a_Point3, &quad );
+    printf("storeXYZ:%f %f %f %f\n", ((float *)&quad)[0], ((float *)&quad)[1], ((float *)&quad)[2], ((float *)&quad)[3]);
+#endif
+    vmathP3Copy( &a_Point3, &b_Point3 );
+    vmathP3Prints( &a_Point3, "assign to Point3 from Point3" );
+    vmathP3MakeFromScalar( &a_Point3, 0.0f );
+    vmathP3Prints( &a_Point3, "set Point3 elements to zero" );
+    vmathP3SetElem( &a_Point3, 0, randfloat() );
+    vmathP3Prints( &a_Point3, "Point3::set( 0, float )" );
+    vmathP3SetElem( &a_Point3, 0, randfloat() );
+    vmathP3SetElem( &a_Point3, 0, ( vmathP3GetElem( &a_Point3, 0 ) * randfloat() ) );
+    vmathP3SetElem( &a_Point3, 0, ( vmathP3GetElem( &a_Point3, 0 ) / randfloat() ) );
+    vmathP3SetElem( &a_Point3, 0, ( vmathP3GetElem( &a_Point3, 0 ) + randfloat() ) );
+    vmathP3SetElem( &a_Point3, 0, ( vmathP3GetElem( &a_Point3, 0 ) - randfloat() ) );
+    vmathP3Prints( &a_Point3, "Point3::operator [](0)" );
+    vmathP3SetX( &a_Point3, randfloat() );
+    vmathP3Prints( &a_Point3, "Point3::setX()" );
+    vmathP3SetElem( &a_Point3, 1, randfloat() );
+    vmathP3Prints( &a_Point3, "Point3::set( 1, float )" );
+    vmathP3SetElem( &a_Point3, 1, randfloat() );
+    vmathP3SetElem( &a_Point3, 1, ( vmathP3GetElem( &a_Point3, 1 ) * randfloat() ) );
+    vmathP3SetElem( &a_Point3, 1, ( vmathP3GetElem( &a_Point3, 1 ) / randfloat() ) );
+    vmathP3SetElem( &a_Point3, 1, ( vmathP3GetElem( &a_Point3, 1 ) + randfloat() ) );
+    vmathP3SetElem( &a_Point3, 1, ( vmathP3GetElem( &a_Point3, 1 ) - randfloat() ) );
+    vmathP3Prints( &a_Point3, "Point3::operator [](1)" );
+    vmathP3SetY( &a_Point3, randfloat() );
+    vmathP3Prints( &a_Point3, "Point3::setY()" );
+    vmathP3SetElem( &a_Point3, 2, randfloat() );
+    vmathP3Prints( &a_Point3, "Point3::set( 2, float )" );
+    vmathP3SetElem( &a_Point3, 2, randfloat() );
+    vmathP3SetElem( &a_Point3, 2, ( vmathP3GetElem( &a_Point3, 2 ) * randfloat() ) );
+    vmathP3SetElem( &a_Point3, 2, ( vmathP3GetElem( &a_Point3, 2 ) / randfloat() ) );
+    vmathP3SetElem( &a_Point3, 2, ( vmathP3GetElem( &a_Point3, 2 ) + randfloat() ) );
+    vmathP3SetElem( &a_Point3, 2, ( vmathP3GetElem( &a_Point3, 2 ) - randfloat() ) );
+    vmathP3Prints( &a_Point3, "Point3::operator [](2)" );
+    vmathP3SetZ( &a_Point3, randfloat() );
+    vmathP3Prints( &a_Point3, "Point3::setZ()" );
+    printf("Point3::get( 0 ): %f\n", getfloat(vmathP3GetElem( &a_Point3, 0 )) );
+    printf("Point3::operator []( 0 ): %f\n", getfloat(vmathP3GetElem( &a_Point3, 0 )) );
+    printf("Point3::getX(): %f\n", getfloat(vmathP3GetX( &a_Point3 )) );
+    printf("Point3::get( 1 ): %f\n", getfloat(vmathP3GetElem( &a_Point3, 1 )) );
+    printf("Point3::operator []( 1 ): %f\n", getfloat(vmathP3GetElem( &a_Point3, 1 )) );
+    printf("Point3::getY(): %f\n", getfloat(vmathP3GetY( &a_Point3 )) );
+    printf("Point3::get( 2 ): %f\n", getfloat(vmathP3GetElem( &a_Point3, 2 )) );
+    printf("Point3::operator []( 2 ): %f\n", getfloat(vmathP3GetElem( &a_Point3, 2 )) );
+    printf("Point3::getZ(): %f\n", getfloat(vmathP3GetZ( &a_Point3 )) );
+    vmathP3Sub( &tmpV3_4, &a_Point3, &b_Point3 );
+    vmathV3Prints( &tmpV3_4, "Point3 - Point3" );
+    vmathP3AddV3( &tmpP3_4, &a_Point3, &b_Vector3 );
+    vmathP3Prints( &tmpP3_4, "Point3 + Vector3" );
+    vmathP3SubV3( &tmpP3_5, &a_Point3, &b_Vector3 );
+    vmathP3Prints( &tmpP3_5, "Point3 - Vector3" );
+    vmathP3MulPerElem( &tmpP3_6, &a_Point3, &b_Point3 );
+    vmathP3Prints( &tmpP3_6, "mulPerElem( Point3, Point3 )" );
+    vmathP3DivPerElem( &tmpP3_7, &a_Point3, &b_Point3 );
+    vmathP3Prints( &tmpP3_7, "divPerElem( Point3, Point3 )" );
+    vmathP3RecipPerElem( &tmpP3_8, &a_Point3 );
+    vmathP3Prints( &tmpP3_8, "Point3 recip" );
+    vmathP3AbsPerElem( &tmpP3_9, &a_Point3 );
+    vmathP3SqrtPerElem( &tmpP3_10, &tmpP3_9 );
+    vmathP3Prints( &tmpP3_10, "Point3 sqrt" );
+    vmathP3AbsPerElem( &tmpP3_11, &a_Point3 );
+    vmathP3RsqrtPerElem( &tmpP3_12, &tmpP3_11 );
+    vmathP3Prints( &tmpP3_12, "Point3 rsqrt" );
+    vmathP3AbsPerElem( &tmpP3_13, &a_Point3 );
+    vmathP3Prints( &tmpP3_13, "Point3 abs" );
+    vmathP3CopySignPerElem( &tmpP3_14, &a_Point3, &b_Point3 );
+    vmathP3Prints( &tmpP3_14, "Point3 copySign" );
+    vmathP3MaxPerElem( &tmpP3_15, &a_Point3, &b_Point3 );
+    vmathP3Prints( &tmpP3_15, "Point3 maximum Point3" );
+    vmathP3MinPerElem( &tmpP3_16, &a_Point3, &b_Point3 );
+    vmathP3Prints( &tmpP3_16, "Point3 minimum Point3" );
+    printf("Point3 maximum of elements: %f\n", getfloat(vmathP3MaxElem( &a_Point3 )));
+    printf("Point3 minimum of elements: %f\n", getfloat(vmathP3MinElem( &a_Point3 )));
+    printf("Point3 sum of elements: %f\n", getfloat(vmathP3Sum( &a_Point3 )));
+    printf("Point projection: %f\n", getfloat(vmathP3Projection( &a_Point3, &b_Vector3 )));
+    printf("Point distSqrFromOrigin: %f\n", getfloat(vmathP3DistSqrFromOrigin( &a_Point3 )) );
+    printf("Point distFromOrigin: %f\n", getfloat(vmathP3DistFromOrigin( &a_Point3 )) );
+    printf("Point distSqr: %f\n", getfloat(vmathP3DistSqr( &a_Point3, &b_Point3 )) );
+    printf("Point dist: %f\n", getfloat(vmathP3Dist( &a_Point3, &b_Point3 )) );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &e_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathP3Lerp( &a_Point3, randfloat(), &b_Point3, &c_Point3 );
+    vmathP3Prints( &a_Point3, "Point3 lerp" );
+}
+
+void
+Quat_methods_test()
+{
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    VmathQuat aos_Quat_0, aos_Quat_1, aos_Quat_2, aos_Quat_3, tmpQ_0, tmpQ_1, tmpQ_2, tmpQ_3;
+    VmathVector3 tmpV3_4;
+    VmathQuat tmpQ_4, tmpQ_5, tmpQ_6, tmpQ_7, tmpQ_8, tmpQ_9, tmpQ_10, tmpQ_11;
+    VmathVector3 tmpV3_5;
+    VmathQuat tmpQ_12;
+    float rndflt1, rndflt2, rndflt3, rndflt4, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3MakeFromScalar( &c_Vector3, 0.0f );
+    vmathV3MakeFromScalar( &d_Vector3, 0.0f );
+    vmathV3MakeFromScalar( &e_Vector3, 0.0f );
+    vmathV3Prints( &c_Vector3, "set Vector3 elements to zero" );
+    vmathV3Prints( &d_Vector3, "set Vector3 elements to zero" );
+    vmathV3Prints( &e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4MakeFromScalar( &c_Vector4, 0.0f );
+    vmathV4MakeFromScalar( &d_Vector4, 0.0f );
+    vmathV4MakeFromScalar( &e_Vector4, 0.0f );
+    vmathV4Prints( &c_Vector4, "set Vector4 elements to zero" );
+    vmathV4Prints( &d_Vector4, "set Vector4 elements to zero" );
+    vmathV4Prints( &e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3MakeFromScalar( &c_Point3, 0.0f );
+    vmathP3MakeFromScalar( &d_Point3, 0.0f );
+    vmathP3MakeFromScalar( &e_Point3, 0.0f );
+    vmathP3Prints( &c_Point3, "set Point3 elements to zero" );
+    vmathP3Prints( &d_Point3, "set Point3 elements to zero" );
+    vmathP3Prints( &e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQMakeFromScalar( &c_Quat, 0.0f );
+    vmathQMakeFromScalar( &d_Quat, 0.0f );
+    vmathQMakeFromScalar( &e_Quat, 0.0f );
+    vmathQPrints( &c_Quat, "set Quat elements to zero" );
+    vmathQPrints( &d_Quat, "set Quat elements to zero" );
+    vmathQPrints( &e_Quat, "set Quat elements to zero" );
+    vmathQMakeFromV3Scalar( &a_Quat, &a_Vector3, randfloat() );
+    vmathQPrints( &a_Quat, "set Quat with Vector3, float" );
+    vmathQMakeFromV4( &a_Quat, &a_Vector4 );
+    vmathQPrints( &a_Quat, "construct Quat with Vector4" );
+    vmathQMakeFromScalar( &a_Quat, randfloat() );
+    vmathQPrints( &a_Quat, "set Quat with float" );
+    vmathQMakeFromScalar( &a_Quat, randfloat() );
+    vmathQPrints( &a_Quat, "set Quat with float" );
+    vmathQMakeFromElems( &aos_Quat_0, 0.0f, 1.0f, 2.0f, 3.0f );
+    vmathQMakeFromElems( &aos_Quat_1, 4.0f, 5.0f, 6.0f, 7.0f );
+    vmathQMakeFromElems( &aos_Quat_2, 8.0f, 9.0f, 10.0f, 11.0f );
+    vmathQMakeFromElems( &aos_Quat_3, 12.0f, 13.0f, 14.0f, 15.0f );
+    vmathQPrints( &aos_Quat_3, "aos type 0" );
+    vmathQPrints( &aos_Quat_2, "aos type 1" );
+    vmathQPrints( &aos_Quat_1, "aos type 2" );
+    vmathQPrints( &aos_Quat_0, "aos type 3" );
+    vmathQSelect( &tmpQ_0, &a_Quat, &b_Quat, 0 );
+    vmathQPrints( &tmpQ_0, "select 0" );
+    vmathQSelect( &tmpQ_1, &a_Quat, &b_Quat, 1 );
+    vmathQPrints( &tmpQ_1, "select 1" );
+    vmathQSelect( &tmpQ_2, &a_Quat, &b_Quat, 0 );
+    vmathQPrints( &tmpQ_2, "select 2" );
+    vmathQSelect( &tmpQ_3, &a_Quat, &b_Quat, (unsigned int)-1 );
+    vmathQPrints( &tmpQ_3, "select 3" );
+    vmathQCopy( &a_Quat, &b_Quat );
+    vmathQPrints( &a_Quat, "assign to Quat from Quat" );
+    vmathQSetXYZ( &a_Quat, &a_Vector3 );
+    vmathQPrints( &a_Quat, "set Quat xyz" );
+    vmathQGetXYZ( &tmpV3_4, &a_Quat );
+    vmathV3Prints( &tmpV3_4, "get Quat xyz" );
+    vmathQMakeFromScalar( &a_Quat, 0.0f );
+    vmathQPrints( &a_Quat, "set Quat elements to zero" );
+    vmathQSetElem( &a_Quat, 0, randfloat() );
+    vmathQPrints( &a_Quat, "Quat::set( 0, float )" );
+    vmathQSetElem( &a_Quat, 0, randfloat() );
+    vmathQSetElem( &a_Quat, 0, ( vmathQGetElem( &a_Quat, 0 ) * randfloat() ) );
+    vmathQSetElem( &a_Quat, 0, ( vmathQGetElem( &a_Quat, 0 ) / randfloat() ) );
+    vmathQSetElem( &a_Quat, 0, ( vmathQGetElem( &a_Quat, 0 ) + randfloat() ) );
+    vmathQSetElem( &a_Quat, 0, ( vmathQGetElem( &a_Quat, 0 ) - randfloat() ) );
+    vmathQPrints( &a_Quat, "Quat::operator [](0)" );
+    vmathQSetX( &a_Quat, randfloat() );
+    vmathQPrints( &a_Quat, "Quat::setX()" );
+    vmathQSetElem( &a_Quat, 1, randfloat() );
+    vmathQPrints( &a_Quat, "Quat::set( 1, float )" );
+    vmathQSetElem( &a_Quat, 1, randfloat() );
+    vmathQSetElem( &a_Quat, 1, ( vmathQGetElem( &a_Quat, 1 ) * randfloat() ) );
+    vmathQSetElem( &a_Quat, 1, ( vmathQGetElem( &a_Quat, 1 ) / randfloat() ) );
+    vmathQSetElem( &a_Quat, 1, ( vmathQGetElem( &a_Quat, 1 ) + randfloat() ) );
+    vmathQSetElem( &a_Quat, 1, ( vmathQGetElem( &a_Quat, 1 ) - randfloat() ) );
+    vmathQPrints( &a_Quat, "Quat::operator [](1)" );
+    vmathQSetY( &a_Quat, randfloat() );
+    vmathQPrints( &a_Quat, "Quat::setY()" );
+    vmathQSetElem( &a_Quat, 2, randfloat() );
+    vmathQPrints( &a_Quat, "Quat::set( 2, float )" );
+    vmathQSetElem( &a_Quat, 2, randfloat() );
+    vmathQSetElem( &a_Quat, 2, ( vmathQGetElem( &a_Quat, 2 ) * randfloat() ) );
+    vmathQSetElem( &a_Quat, 2, ( vmathQGetElem( &a_Quat, 2 ) / randfloat() ) );
+    vmathQSetElem( &a_Quat, 2, ( vmathQGetElem( &a_Quat, 2 ) + randfloat() ) );
+    vmathQSetElem( &a_Quat, 2, ( vmathQGetElem( &a_Quat, 2 ) - randfloat() ) );
+    vmathQPrints( &a_Quat, "Quat::operator [](2)" );
+    vmathQSetZ( &a_Quat, randfloat() );
+    vmathQPrints( &a_Quat, "Quat::setZ()" );
+    vmathQSetElem( &a_Quat, 3, randfloat() );
+    vmathQPrints( &a_Quat, "Quat::set( 3, float )" );
+    vmathQSetElem( &a_Quat, 3, randfloat() );
+    vmathQSetElem( &a_Quat, 3, ( vmathQGetElem( &a_Quat, 3 ) * randfloat() ) );
+    vmathQSetElem( &a_Quat, 3, ( vmathQGetElem( &a_Quat, 3 ) / randfloat() ) );
+    vmathQSetElem( &a_Quat, 3, ( vmathQGetElem( &a_Quat, 3 ) + randfloat() ) );
+    vmathQSetElem( &a_Quat, 3, ( vmathQGetElem( &a_Quat, 3 ) - randfloat() ) );
+    vmathQPrints( &a_Quat, "Quat::operator [](3)" );
+    vmathQSetW( &a_Quat, randfloat() );
+    vmathQPrints( &a_Quat, "Quat::setW()" );
+    printf("Quat::get( 0 ): %f\n", getfloat(vmathQGetElem( &a_Quat, 0 )) );
+    printf("Quat::operator []( 0 ): %f\n", getfloat(vmathQGetElem( &a_Quat, 0 )) );
+    printf("Quat::getX(): %f\n", getfloat(vmathQGetX( &a_Quat )) );
+    printf("Quat::get( 1 ): %f\n", getfloat(vmathQGetElem( &a_Quat, 1 )) );
+    printf("Quat::operator []( 1 ): %f\n", getfloat(vmathQGetElem( &a_Quat, 1 )) );
+    printf("Quat::getY(): %f\n", getfloat(vmathQGetY( &a_Quat )) );
+    printf("Quat::get( 2 ): %f\n", getfloat(vmathQGetElem( &a_Quat, 2 )) );
+    printf("Quat::operator []( 2 ): %f\n", getfloat(vmathQGetElem( &a_Quat, 2 )) );
+    printf("Quat::getZ(): %f\n", getfloat(vmathQGetZ( &a_Quat )) );
+    printf("Quat::get( 3 ): %f\n", getfloat(vmathQGetElem( &a_Quat, 3 )) );
+    printf("Quat::operator []( 3 ): %f\n", getfloat(vmathQGetElem( &a_Quat, 3 )) );
+    printf("Quat::getW(): %f\n", getfloat(vmathQGetW( &a_Quat )) );
+    vmathQAdd( &tmpQ_4, &a_Quat, &b_Quat );
+    vmathQPrints( &tmpQ_4, "Quat + Quat" );
+    vmathQSub( &tmpQ_5, &a_Quat, &b_Quat );
+    vmathQPrints( &tmpQ_5, "Quat - Quat" );
+    vmathQMul( &tmpQ_6, &a_Quat, &b_Quat );
+    vmathQPrints( &tmpQ_6, "Quat * Quat" );
+    vmathQScalarMul( &tmpQ_7, &a_Quat, randfloat() );
+    vmathQPrints( &tmpQ_7, "Quat * float" );
+    vmathQScalarDiv( &tmpQ_8, &a_Quat, randfloat() );
+    vmathQPrints( &tmpQ_8, "Quat / float" );
+    vmathQScalarMul( &tmpQ_9, &a_Quat, randfloat() );
+    vmathQPrints( &tmpQ_9, "float * Quat" );
+    vmathQNeg( &tmpQ_10, &a_Quat );
+    vmathQPrints( &tmpQ_10, "Quat negate" );
+    printf("Quat dot Quat: %f\n", getfloat(vmathQDot( &a_Quat, &b_Quat )));
+    printf("Quat lengthSqr: %f\n", getfloat(vmathQNorm( &a_Quat )));
+    printf("Quat length: %f\n", getfloat(vmathQLength( &a_Quat )));
+    vmathQNormalize( &tmpQ_11, &a_Quat );
+    vmathQPrints( &tmpQ_11, "Quat normalized" );
+    vmathQMakeIdentity( &a_Quat );
+    vmathQPrints( &a_Quat, "set to identity" );
+    vmathQMakeRotationArc( &a_Quat, &a_Vector3, &b_Vector3 );
+    vmathQPrints( &a_Quat, "Quat rotation between vectors" );
+    vmathQMakeRotationAxis( &a_Quat, randfloat(), &a_Vector3 );
+    vmathQPrints( &a_Quat, "Quat rotation axis angle" );
+    vmathQMakeRotationX( &a_Quat, randfloat() );
+    vmathQPrints( &a_Quat, "Quat rotationX" );
+    vmathQMakeRotationY( &a_Quat, randfloat() );
+    vmathQPrints( &a_Quat, "Quat rotationY" );
+    vmathQMakeRotationZ( &a_Quat, randfloat() );
+    vmathQPrints( &a_Quat, "Quat rotationZ" );
+    vmathQRotate( &tmpV3_5, &a_Quat, &a_Vector3 );
+    vmathV3Prints( &tmpV3_5, "Quat rotate Vector3" );
+    vmathQConj( &tmpQ_12, &a_Quat );
+    vmathQPrints( &tmpQ_12, "Quat conj" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &e_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQNormalize( &b_Quat, &b_Quat );
+    vmathQNormalize( &c_Quat, &c_Quat );
+    vmathQNormalize( &d_Quat, &d_Quat );
+    vmathQNormalize( &e_Quat, &e_Quat );
+    vmathQLerp( &a_Quat, randfloat(), &b_Quat, &c_Quat );
+    vmathQPrints( &a_Quat, "Quat lerp" );
+    vmathQSlerp( &a_Quat, randfloat(), &b_Quat, &c_Quat );
+    vmathQPrints( &a_Quat, "Quat slerp" );
+    vmathQSquad( &a_Quat, randfloat(), &b_Quat, &c_Quat, &d_Quat, &e_Quat );
+    vmathQPrints( &a_Quat, "Quat squad" );
+}
+
+int main()
+{
+    printf("\n __begin__ \n");
+    for ( iteration = 0; iteration < 2; iteration++ ) {
+        Vector3_methods_test();
+        Vector4_methods_test();
+        Point3_methods_test();
+        Quat_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test1_aos_cpp.cpp b/Extras/vectormathlibrary/tests/test1_aos_cpp.cpp
new file mode 100644
index 000000000..2a960787d
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test1_aos_cpp.cpp
@@ -0,0 +1,1102 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_AOS_TEST
+
+#include "vectormath_aos.h"
+#include "test.h"
+
+int iteration = 0;
+
+using namespace Vectormath;
+using namespace Vectormath::Aos;
+
+void
+Vector3_methods_test()
+{
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    Vector4 tmpV4;
+    Vector3 aos_Vector3_0, aos_Vector3_1, aos_Vector3_2, aos_Vector3_3;
+    Vector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2;
+    float rndflt1, rndflt2, rndflt3, rndflt4, pad;
+    float xyz4[12] __attribute__ ((aligned(16)));
+#ifndef _VECTORMATH_SCALAR_TEST
+    vec_float4 quad;
+#endif
+    xyz4[0] = randfloat();
+    xyz4[1] = randfloat();
+    xyz4[2] = randfloat();
+    xyz4[3] = randfloat();
+    xyz4[4] = randfloat();
+    xyz4[5] = randfloat();
+    xyz4[6] = randfloat();
+    xyz4[7] = randfloat();
+    xyz4[8] = randfloat();
+    xyz4[9] = randfloat();
+    xyz4[10] = randfloat();
+    xyz4[11] = randfloat();
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Vector3 = Vector3( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3) );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    c_Vector3 = Vector3( 0.0f );
+    d_Vector3 = Vector3( 0.0f );
+    e_Vector3 = Vector3( 0.0f );
+    print( c_Vector3, "set Vector3 elements to zero" );
+    print( d_Vector3, "set Vector3 elements to zero" );
+    print( e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Vector4 = Vector4( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3), scalar_float(rndflt4) );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    c_Vector4 = Vector4( 0.0f );
+    d_Vector4 = Vector4( 0.0f );
+    e_Vector4 = Vector4( 0.0f );
+    print( c_Vector4, "set Vector4 elements to zero" );
+    print( d_Vector4, "set Vector4 elements to zero" );
+    print( e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Point3 = Point3( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3) );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    c_Point3 = Point3( 0.0f );
+    d_Point3 = Point3( 0.0f );
+    e_Point3 = Point3( 0.0f );
+    print( c_Point3, "set Point3 elements to zero" );
+    print( d_Point3, "set Point3 elements to zero" );
+    print( e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Quat = Quat( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3), scalar_float(rndflt4) );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    c_Quat = Quat( 0.0f );
+    d_Quat = Quat( 0.0f );
+    e_Quat = Quat( 0.0f );
+    print( c_Quat, "set Quat elements to zero" );
+    print( d_Quat, "set Quat elements to zero" );
+    print( e_Quat, "set Quat elements to zero" );
+    a_Vector3 = Vector3( a_Point3 );
+    print( a_Vector3, "construct Vector3 with Point3" );
+    a_Vector3 = Vector3( randfloat() );
+    print( a_Vector3, "set Vector3 with float" );
+    a_Vector3 = Vector3( scalar_float(randfloat()) );
+    print( a_Vector3, "set Vector3 with float" );
+    aos_Vector3_0 = Vector3( 0.0f, 1.0f, 2.0f );
+    aos_Vector3_1 = Vector3( 3.0f, 4.0f, 5.0f );
+    aos_Vector3_2 = Vector3( 6.0f, 7.0f, 8.0f );
+    aos_Vector3_3 = Vector3( 9.0f, 10.0f, 11.0f );
+    print( aos_Vector3_3, "aos type 0" );
+    print( aos_Vector3_2, "aos type 1" );
+    print( aos_Vector3_1, "aos type 2" );
+    print( aos_Vector3_0, "aos type 3" );
+    print( select( a_Vector3, b_Vector3, 0 ), "select 0" );
+    print( select( a_Vector3, b_Vector3, 1 ), "select 1" );
+    print( select( a_Vector3, b_Vector3, 0 ), "select 2" );
+    print( select( a_Vector3, b_Vector3, (unsigned int)-1 ), "select 3" );
+    a_Vector3 = Vector3( xyz4[0], xyz4[1], xyz4[2] );
+    print( a_Vector3, "load XYZ array" );
+    xyz4[0] = -xyz4[0];
+    xyz4[1] = -xyz4[1];
+    xyz4[2] = -xyz4[2];
+    xyz4[3] = -xyz4[3];
+    xyz4[4] = -xyz4[4];
+    xyz4[5] = -xyz4[5];
+    xyz4[6] = -xyz4[6];
+    xyz4[7] = -xyz4[7];
+    xyz4[8] = -xyz4[8];
+    xyz4[9] = -xyz4[9];
+    xyz4[10] = -xyz4[10];
+    xyz4[11] = -xyz4[11];
+    aos_Vector4_0 = Vector4( xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    aos_Vector4_1 = Vector4( xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    aos_Vector4_2 = Vector4( xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    print( aos_Vector4_0, "xyzx" );
+    print( aos_Vector4_1, "yzxy" );
+    print( aos_Vector4_2, "zxyz" );
+#ifndef _VECTORMATH_SCALAR_TEST
+    loadXYZArray( aos_Vector3_0, aos_Vector3_1, aos_Vector3_2, aos_Vector3_3, (const vec_float4 *)xyz4 );
+    xyz4[0] = 0;
+    xyz4[1] = 1;
+    xyz4[2] = 2;
+    xyz4[3] = 3;
+    xyz4[4] = 4;
+    xyz4[5] = 5;
+    xyz4[6] = 6;
+    xyz4[7] = 7;
+    xyz4[8] = 8;
+    xyz4[9] = 9;
+    xyz4[10] = 10;
+    xyz4[11] = 11;
+    storeXYZArray( aos_Vector3_0, aos_Vector3_1, aos_Vector3_2, aos_Vector3_3, (vec_float4 *)xyz4 );
+#endif
+    aos_Vector4_0 = Vector4( xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    aos_Vector4_1 = Vector4( xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    aos_Vector4_2 = Vector4( xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    print( aos_Vector4_0, "xyzx" );
+    print( aos_Vector4_1, "yzxy" );
+    print( aos_Vector4_2, "zxyz" );
+#ifdef _VECTORMATH_SCALAR_TEST
+    printf("storeXYZ:-1.0 -2.0 -3.0 0.4\n");
+#else
+    quad = (vec_float4){-1.0f, -2.0f, -3.0f, -4.0f};
+    a_Vector3 = Vector3( quad );
+    quad = (vec_float4){0.1f, 0.2f, 0.3f, 0.4f};
+    storeXYZ( a_Vector3, &quad );
+    printf("storeXYZ:%f %f %f %f\n", ((float *)&quad)[0], ((float *)&quad)[1], ((float *)&quad)[2], ((float *)&quad)[3]);
+#endif
+    a_Vector3 = b_Vector3;
+    print( a_Vector3, "assign to Vector3 from Vector3" );
+    a_Vector3 = Vector3( 0.0f );
+    print( a_Vector3, "set Vector3 elements to zero" );
+    a_Vector3 = Vector3::xAxis( );
+    print( a_Vector3, "set to x axis" );
+    a_Vector3 = Vector3::yAxis( );
+    print( a_Vector3, "set to y axis" );
+    a_Vector3 = Vector3::zAxis( );
+    print( a_Vector3, "set to z axis" );
+    if (iteration % 2) {
+    a_Vector3.setElem( 0, randfloat() );
+    } else {
+    a_Vector3.setElem( 0, scalar_float(randfloat()) );
+    }
+    print( a_Vector3, "Vector3::set( 0, float )" );
+    a_Vector3[0] = randfloat();
+    a_Vector3[0] *= randfloat();
+    a_Vector3[0] /= randfloat();
+    a_Vector3[0] += randfloat();
+    a_Vector3[0] -= randfloat();
+    print( a_Vector3, "Vector3::operator [](0)" );
+    a_Vector3.setX( randfloat() );
+    print( a_Vector3, "Vector3::setX()" );
+    if (iteration % 2) {
+    a_Vector3.setElem( 1, randfloat() );
+    } else {
+    a_Vector3.setElem( 1, scalar_float(randfloat()) );
+    }
+    print( a_Vector3, "Vector3::set( 1, float )" );
+    a_Vector3[1] = randfloat();
+    a_Vector3[1] *= randfloat();
+    a_Vector3[1] /= randfloat();
+    a_Vector3[1] += randfloat();
+    a_Vector3[1] -= randfloat();
+    print( a_Vector3, "Vector3::operator [](1)" );
+    a_Vector3.setY( randfloat() );
+    print( a_Vector3, "Vector3::setY()" );
+    if (iteration % 2) {
+    a_Vector3.setElem( 2, randfloat() );
+    } else {
+    a_Vector3.setElem( 2, scalar_float(randfloat()) );
+    }
+    print( a_Vector3, "Vector3::set( 2, float )" );
+    a_Vector3[2] = randfloat();
+    a_Vector3[2] *= randfloat();
+    a_Vector3[2] /= randfloat();
+    a_Vector3[2] += randfloat();
+    a_Vector3[2] -= randfloat();
+    print( a_Vector3, "Vector3::operator [](2)" );
+    a_Vector3.setZ( randfloat() );
+    print( a_Vector3, "Vector3::setZ()" );
+    printf("Vector3::get( 0 ): %f\n", getfloat(a_Vector3.getElem( 0 )) );
+    printf("Vector3::operator []( 0 ): %f\n", getfloat((float)a_Vector3[0]) );
+    printf("Vector3::getX(): %f\n", getfloat(a_Vector3.getX( )) );
+    printf("Vector3::get( 1 ): %f\n", getfloat(a_Vector3.getElem( 1 )) );
+    printf("Vector3::operator []( 1 ): %f\n", getfloat((float)a_Vector3[1]) );
+    printf("Vector3::getY(): %f\n", getfloat(a_Vector3.getY( )) );
+    printf("Vector3::get( 2 ): %f\n", getfloat(a_Vector3.getElem( 2 )) );
+    printf("Vector3::operator []( 2 ): %f\n", getfloat((float)a_Vector3[2]) );
+    printf("Vector3::getZ(): %f\n", getfloat(a_Vector3.getZ( )) );
+    print( ( a_Vector3 + b_Vector3 ), "Vector3 + Vector3" );
+    print( ( a_Vector3 - b_Vector3 ), "Vector3 - Vector3" );
+    print( ( a_Vector3 + b_Point3 ), "Vector3 + Point3" );
+    print( ( a_Vector3 * randfloat() ), "Vector3 * float" );
+    print( ( a_Vector3 / randfloat() ), "Vector3 / float" );
+    print( ( randfloat() * a_Vector3 ), "float * Vector3" );
+    print( ( -a_Vector3 ), "Vector3 negate" );
+    print( mulPerElem( a_Vector3, b_Vector3 ), "mulPerElem( Vector3, Vector3 )" );
+    print( divPerElem( a_Vector3, b_Vector3 ), "divPerElem( Vector3, Vector3 )" );
+    print( recipPerElem( a_Vector3 ), "Vector3 recip" );
+    print( sqrtPerElem( absPerElem( a_Vector3 ) ), "Vector3 sqrt" );
+    print( rsqrtPerElem( absPerElem( a_Vector3 ) ), "Vector3 rsqrt" );
+    print( absPerElem( a_Vector3 ), "Vector3 abs" );
+    print( copySignPerElem( a_Vector3, b_Vector3 ), "Vector3 copySign" );
+    print( maxPerElem( a_Vector3, b_Vector3 ), "Vector3 maximum Vector3" );
+    print( minPerElem( a_Vector3, b_Vector3 ), "Vector3 minimum Vector3" );
+    printf("Vector3 maximum of elements: %f\n", getfloat(maxElem( a_Vector3 )));
+    printf("Vector3 minimum of elements: %f\n", getfloat(minElem( a_Vector3 )));
+    printf("Vector3 sum of elements: %f\n", getfloat(sum( a_Vector3 )));
+    printf("Vector3 dot Vector3: %f\n", getfloat(dot( a_Vector3, b_Vector3 )));
+    printf("Vector3 lengthSqr: %f\n", getfloat(lengthSqr( a_Vector3 )));
+    printf("Vector3 length: %f\n", getfloat(length( a_Vector3 )));
+    print( normalize( a_Vector3 ), "Vector3 normalized" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    e_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    b_Vector3 = normalize( b_Vector3 );
+    c_Vector3 = normalize( c_Vector3 );
+    d_Vector3 = normalize( d_Vector3 );
+    e_Vector3 = normalize( e_Vector3 );
+    a_Vector3 = lerp( randfloat(), b_Vector3, c_Vector3 );
+    print( a_Vector3, "Vector3 lerp" );
+    a_Vector3 = slerp( randfloat(), b_Vector3, c_Vector3 );
+    print( a_Vector3, "Vector3 slerp" );
+}
+
+void
+Vector4_methods_test()
+{
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    Vector4 tmpV4, aos_Vector4_0, aos_Vector4_1, aos_Vector4_2, aos_Vector4_3;
+    float rndflt1, rndflt2, rndflt3, rndflt4, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Vector3 = Vector3( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3) );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    c_Vector3 = Vector3( 0.0f );
+    d_Vector3 = Vector3( 0.0f );
+    e_Vector3 = Vector3( 0.0f );
+    print( c_Vector3, "set Vector3 elements to zero" );
+    print( d_Vector3, "set Vector3 elements to zero" );
+    print( e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Vector4 = Vector4( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3), scalar_float(rndflt4) );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    c_Vector4 = Vector4( 0.0f );
+    d_Vector4 = Vector4( 0.0f );
+    e_Vector4 = Vector4( 0.0f );
+    print( c_Vector4, "set Vector4 elements to zero" );
+    print( d_Vector4, "set Vector4 elements to zero" );
+    print( e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Point3 = Point3( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3) );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    c_Point3 = Point3( 0.0f );
+    d_Point3 = Point3( 0.0f );
+    e_Point3 = Point3( 0.0f );
+    print( c_Point3, "set Point3 elements to zero" );
+    print( d_Point3, "set Point3 elements to zero" );
+    print( e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Quat = Quat( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3), scalar_float(rndflt4) );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    c_Quat = Quat( 0.0f );
+    d_Quat = Quat( 0.0f );
+    e_Quat = Quat( 0.0f );
+    print( c_Quat, "set Quat elements to zero" );
+    print( d_Quat, "set Quat elements to zero" );
+    print( e_Quat, "set Quat elements to zero" );
+    a_Vector4 = Vector4( a_Vector3, randfloat() );
+    print( a_Vector4, "set Vector4 with Vector3, float" );
+    a_Vector4 = Vector4( a_Vector3 );
+    print( a_Vector4, "set Vector4 with Vector3" );
+    a_Vector4 = Vector4( a_Point3 );
+    print( a_Vector4, "set Vector4 with Point3" );
+    a_Vector4 = Vector4( a_Quat );
+    print( a_Vector4, "construct Vector4 with Quat" );
+    a_Vector4 = Vector4( randfloat() );
+    print( a_Vector4, "set Vector4 with float" );
+    a_Vector4 = Vector4( scalar_float(randfloat()) );
+    print( a_Vector4, "set Vector4 with float" );
+    aos_Vector4_0 = Vector4( 0.0f, 1.0f, 2.0f, 3.0f );
+    aos_Vector4_1 = Vector4( 4.0f, 5.0f, 6.0f, 7.0f );
+    aos_Vector4_2 = Vector4( 8.0f, 9.0f, 10.0f, 11.0f );
+    aos_Vector4_3 = Vector4( 12.0f, 13.0f, 14.0f, 15.0f );
+    print( aos_Vector4_3, "aos type 0" );
+    print( aos_Vector4_2, "aos type 1" );
+    print( aos_Vector4_1, "aos type 2" );
+    print( aos_Vector4_0, "aos type 3" );
+    print( select( a_Vector4, b_Vector4, 0 ), "select 0" );
+    print( select( a_Vector4, b_Vector4, 1 ), "select 1" );
+    print( select( a_Vector4, b_Vector4, 0 ), "select 2" );
+    print( select( a_Vector4, b_Vector4, (unsigned int)-1 ), "select 3" );
+    a_Vector4 = b_Vector4;
+    print( a_Vector4, "assign to Vector4 from Vector4" );
+    a_Vector4.setXYZ( a_Vector3 );
+    print( a_Vector4, "set Vector4 xyz" );
+    print( a_Vector4.getXYZ( ), "get Vector4 xyz" );
+    a_Vector4 = Vector4( 0.0f );
+    print( a_Vector4, "set Vector4 elements to zero" );
+    a_Vector4 = Vector4::xAxis( );
+    print( a_Vector4, "set to x axis" );
+    a_Vector4 = Vector4::yAxis( );
+    print( a_Vector4, "set to y axis" );
+    a_Vector4 = Vector4::zAxis( );
+    print( a_Vector4, "set to z axis" );
+    a_Vector4 = Vector4::wAxis( );
+    print( a_Vector4, "set to w axis" );
+    if (iteration % 2) {
+    a_Vector4.setElem( 0, randfloat() );
+    } else {
+    a_Vector4.setElem( 0, scalar_float(randfloat()) );
+    }
+    print( a_Vector4, "Vector4::set( 0, float )" );
+    a_Vector4[0] = randfloat();
+    a_Vector4[0] *= randfloat();
+    a_Vector4[0] /= randfloat();
+    a_Vector4[0] += randfloat();
+    a_Vector4[0] -= randfloat();
+    print( a_Vector4, "Vector4::operator [](0)" );
+    a_Vector4.setX( randfloat() );
+    print( a_Vector4, "Vector4::setX()" );
+    if (iteration % 2) {
+    a_Vector4.setElem( 1, randfloat() );
+    } else {
+    a_Vector4.setElem( 1, scalar_float(randfloat()) );
+    }
+    print( a_Vector4, "Vector4::set( 1, float )" );
+    a_Vector4[1] = randfloat();
+    a_Vector4[1] *= randfloat();
+    a_Vector4[1] /= randfloat();
+    a_Vector4[1] += randfloat();
+    a_Vector4[1] -= randfloat();
+    print( a_Vector4, "Vector4::operator [](1)" );
+    a_Vector4.setY( randfloat() );
+    print( a_Vector4, "Vector4::setY()" );
+    if (iteration % 2) {
+    a_Vector4.setElem( 2, randfloat() );
+    } else {
+    a_Vector4.setElem( 2, scalar_float(randfloat()) );
+    }
+    print( a_Vector4, "Vector4::set( 2, float )" );
+    a_Vector4[2] = randfloat();
+    a_Vector4[2] *= randfloat();
+    a_Vector4[2] /= randfloat();
+    a_Vector4[2] += randfloat();
+    a_Vector4[2] -= randfloat();
+    print( a_Vector4, "Vector4::operator [](2)" );
+    a_Vector4.setZ( randfloat() );
+    print( a_Vector4, "Vector4::setZ()" );
+    if (iteration % 2) {
+    a_Vector4.setElem( 3, randfloat() );
+    } else {
+    a_Vector4.setElem( 3, scalar_float(randfloat()) );
+    }
+    print( a_Vector4, "Vector4::set( 3, float )" );
+    a_Vector4[3] = randfloat();
+    a_Vector4[3] *= randfloat();
+    a_Vector4[3] /= randfloat();
+    a_Vector4[3] += randfloat();
+    a_Vector4[3] -= randfloat();
+    print( a_Vector4, "Vector4::operator [](3)" );
+    a_Vector4.setW( randfloat() );
+    print( a_Vector4, "Vector4::setW()" );
+    printf("Vector4::get( 0 ): %f\n", getfloat(a_Vector4.getElem( 0 )) );
+    printf("Vector4::operator []( 0 ): %f\n", getfloat((float)a_Vector4[0]) );
+    printf("Vector4::getX(): %f\n", getfloat(a_Vector4.getX( )) );
+    printf("Vector4::get( 1 ): %f\n", getfloat(a_Vector4.getElem( 1 )) );
+    printf("Vector4::operator []( 1 ): %f\n", getfloat((float)a_Vector4[1]) );
+    printf("Vector4::getY(): %f\n", getfloat(a_Vector4.getY( )) );
+    printf("Vector4::get( 2 ): %f\n", getfloat(a_Vector4.getElem( 2 )) );
+    printf("Vector4::operator []( 2 ): %f\n", getfloat((float)a_Vector4[2]) );
+    printf("Vector4::getZ(): %f\n", getfloat(a_Vector4.getZ( )) );
+    printf("Vector4::get( 3 ): %f\n", getfloat(a_Vector4.getElem( 3 )) );
+    printf("Vector4::operator []( 3 ): %f\n", getfloat((float)a_Vector4[3]) );
+    printf("Vector4::getW(): %f\n", getfloat(a_Vector4.getW( )) );
+    print( ( a_Vector4 + b_Vector4 ), "Vector4 + Vector4" );
+    print( ( a_Vector4 - b_Vector4 ), "Vector4 - Vector4" );
+    print( ( a_Vector4 * randfloat() ), "Vector4 * float" );
+    print( ( a_Vector4 / randfloat() ), "Vector4 / float" );
+    print( ( randfloat() * a_Vector4 ), "float * Vector4" );
+    print( ( -a_Vector4 ), "Vector4 negate" );
+    print( mulPerElem( a_Vector4, b_Vector4 ), "mulPerElem( Vector4, Vector4 )" );
+    print( divPerElem( a_Vector4, b_Vector4 ), "divPerElem( Vector4, Vector4 )" );
+    print( recipPerElem( a_Vector4 ), "Vector4 recip" );
+    print( sqrtPerElem( absPerElem( a_Vector4 ) ), "Vector4 sqrt" );
+    print( rsqrtPerElem( absPerElem( a_Vector4 ) ), "Vector4 rsqrt" );
+    print( absPerElem( a_Vector4 ), "Vector4 abs" );
+    print( copySignPerElem( a_Vector4, b_Vector4 ), "Vector4 copySign" );
+    print( maxPerElem( a_Vector4, b_Vector4 ), "Vector4 maximum Vector4" );
+    print( minPerElem( a_Vector4, b_Vector4 ), "Vector4 minimum Vector4" );
+    printf("Vector4 maximum of elements: %f\n", getfloat(maxElem( a_Vector4 )));
+    printf("Vector4 minimum of elements: %f\n", getfloat(minElem( a_Vector4 )));
+    printf("Vector4 sum of elements: %f\n", getfloat(sum( a_Vector4 )));
+    printf("Vector4 dot Vector4: %f\n", getfloat(dot( a_Vector4, b_Vector4 )));
+    printf("Vector4 lengthSqr: %f\n", getfloat(lengthSqr( a_Vector4 )));
+    printf("Vector4 length: %f\n", getfloat(length( a_Vector4 )));
+    print( normalize( a_Vector4 ), "Vector4 normalized" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    e_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    b_Vector4 = normalize( b_Vector4 );
+    c_Vector4 = normalize( c_Vector4 );
+    d_Vector4 = normalize( d_Vector4 );
+    e_Vector4 = normalize( e_Vector4 );
+    a_Vector4 = lerp( randfloat(), b_Vector4, c_Vector4 );
+    print( a_Vector4, "Vector4 lerp" );
+    a_Vector4 = slerp( randfloat(), b_Vector4, c_Vector4 );
+    print( a_Vector4, "Vector4 slerp" );
+}
+
+void
+Point3_methods_test()
+{
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    Vector4 tmpV4;
+    Point3 aos_Point3_0, aos_Point3_1, aos_Point3_2, aos_Point3_3;
+    Vector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2;
+    float rndflt1, rndflt2, rndflt3, rndflt4, pad;
+    float xyz4[12] __attribute__ ((aligned(16)));
+#ifndef _VECTORMATH_SCALAR_TEST
+    vec_float4 quad;
+#endif
+    xyz4[0] = randfloat();
+    xyz4[1] = randfloat();
+    xyz4[2] = randfloat();
+    xyz4[3] = randfloat();
+    xyz4[4] = randfloat();
+    xyz4[5] = randfloat();
+    xyz4[6] = randfloat();
+    xyz4[7] = randfloat();
+    xyz4[8] = randfloat();
+    xyz4[9] = randfloat();
+    xyz4[10] = randfloat();
+    xyz4[11] = randfloat();
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Vector3 = Vector3( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3) );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    c_Vector3 = Vector3( 0.0f );
+    d_Vector3 = Vector3( 0.0f );
+    e_Vector3 = Vector3( 0.0f );
+    print( c_Vector3, "set Vector3 elements to zero" );
+    print( d_Vector3, "set Vector3 elements to zero" );
+    print( e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Vector4 = Vector4( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3), scalar_float(rndflt4) );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    c_Vector4 = Vector4( 0.0f );
+    d_Vector4 = Vector4( 0.0f );
+    e_Vector4 = Vector4( 0.0f );
+    print( c_Vector4, "set Vector4 elements to zero" );
+    print( d_Vector4, "set Vector4 elements to zero" );
+    print( e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Point3 = Point3( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3) );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    c_Point3 = Point3( 0.0f );
+    d_Point3 = Point3( 0.0f );
+    e_Point3 = Point3( 0.0f );
+    print( c_Point3, "set Point3 elements to zero" );
+    print( d_Point3, "set Point3 elements to zero" );
+    print( e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Quat = Quat( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3), scalar_float(rndflt4) );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    c_Quat = Quat( 0.0f );
+    d_Quat = Quat( 0.0f );
+    e_Quat = Quat( 0.0f );
+    print( c_Quat, "set Quat elements to zero" );
+    print( d_Quat, "set Quat elements to zero" );
+    print( e_Quat, "set Quat elements to zero" );
+    a_Point3 = Point3( a_Vector3 );
+    print( a_Point3, "construct Point3 with Vector3" );
+    a_Point3 = Point3( randfloat() );
+    print( a_Point3, "set Point3 with float" );
+    a_Point3 = Point3( scalar_float(randfloat()) );
+    print( a_Point3, "set Point3 with float" );
+    aos_Point3_0 = Point3( 0.0f, 1.0f, 2.0f );
+    aos_Point3_1 = Point3( 3.0f, 4.0f, 5.0f );
+    aos_Point3_2 = Point3( 6.0f, 7.0f, 8.0f );
+    aos_Point3_3 = Point3( 9.0f, 10.0f, 11.0f );
+    print( aos_Point3_3, "aos type 0" );
+    print( aos_Point3_2, "aos type 1" );
+    print( aos_Point3_1, "aos type 2" );
+    print( aos_Point3_0, "aos type 3" );
+    print( select( a_Point3, b_Point3, 0 ), "select 0" );
+    print( select( a_Point3, b_Point3, 1 ), "select 1" );
+    print( select( a_Point3, b_Point3, 0 ), "select 2" );
+    print( select( a_Point3, b_Point3, (unsigned int)-1 ), "select 3" );
+    a_Point3 = Point3( xyz4[0], xyz4[1], xyz4[2] );
+    print( a_Point3, "load XYZ array" );
+    xyz4[0] = -xyz4[0];
+    xyz4[1] = -xyz4[1];
+    xyz4[2] = -xyz4[2];
+    xyz4[3] = -xyz4[3];
+    xyz4[4] = -xyz4[4];
+    xyz4[5] = -xyz4[5];
+    xyz4[6] = -xyz4[6];
+    xyz4[7] = -xyz4[7];
+    xyz4[8] = -xyz4[8];
+    xyz4[9] = -xyz4[9];
+    xyz4[10] = -xyz4[10];
+    xyz4[11] = -xyz4[11];
+    aos_Vector4_0 = Vector4( xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    aos_Vector4_1 = Vector4( xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    aos_Vector4_2 = Vector4( xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    print( aos_Vector4_0, "xyzx" );
+    print( aos_Vector4_1, "yzxy" );
+    print( aos_Vector4_2, "zxyz" );
+#ifndef _VECTORMATH_SCALAR_TEST
+    loadXYZArray( aos_Point3_0, aos_Point3_1, aos_Point3_2, aos_Point3_3, (const vec_float4 *)xyz4 );
+    xyz4[0] = 0;
+    xyz4[1] = 1;
+    xyz4[2] = 2;
+    xyz4[3] = 3;
+    xyz4[4] = 4;
+    xyz4[5] = 5;
+    xyz4[6] = 6;
+    xyz4[7] = 7;
+    xyz4[8] = 8;
+    xyz4[9] = 9;
+    xyz4[10] = 10;
+    xyz4[11] = 11;
+    storeXYZArray( aos_Point3_0, aos_Point3_1, aos_Point3_2, aos_Point3_3, (vec_float4 *)xyz4 );
+#endif
+    aos_Vector4_0 = Vector4( xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    aos_Vector4_1 = Vector4( xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    aos_Vector4_2 = Vector4( xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    print( aos_Vector4_0, "xyzx" );
+    print( aos_Vector4_1, "yzxy" );
+    print( aos_Vector4_2, "zxyz" );
+#ifdef _VECTORMATH_SCALAR_TEST
+    printf("storeXYZ:-1.0 -2.0 -3.0 0.4\n");
+#else
+    quad = (vec_float4){-1.0f, -2.0f, -3.0f, -4.0f};
+    a_Point3 = Point3( quad );
+    quad = (vec_float4){0.1f, 0.2f, 0.3f, 0.4f};
+    storeXYZ( a_Point3, &quad );
+    printf("storeXYZ:%f %f %f %f\n", ((float *)&quad)[0], ((float *)&quad)[1], ((float *)&quad)[2], ((float *)&quad)[3]);
+#endif
+    a_Point3 = b_Point3;
+    print( a_Point3, "assign to Point3 from Point3" );
+    a_Point3 = Point3( 0.0f );
+    print( a_Point3, "set Point3 elements to zero" );
+    if (iteration % 2) {
+    a_Point3.setElem( 0, randfloat() );
+    } else {
+    a_Point3.setElem( 0, scalar_float(randfloat()) );
+    }
+    print( a_Point3, "Point3::set( 0, float )" );
+    a_Point3[0] = randfloat();
+    a_Point3[0] *= randfloat();
+    a_Point3[0] /= randfloat();
+    a_Point3[0] += randfloat();
+    a_Point3[0] -= randfloat();
+    print( a_Point3, "Point3::operator [](0)" );
+    a_Point3.setX( randfloat() );
+    print( a_Point3, "Point3::setX()" );
+    if (iteration % 2) {
+    a_Point3.setElem( 1, randfloat() );
+    } else {
+    a_Point3.setElem( 1, scalar_float(randfloat()) );
+    }
+    print( a_Point3, "Point3::set( 1, float )" );
+    a_Point3[1] = randfloat();
+    a_Point3[1] *= randfloat();
+    a_Point3[1] /= randfloat();
+    a_Point3[1] += randfloat();
+    a_Point3[1] -= randfloat();
+    print( a_Point3, "Point3::operator [](1)" );
+    a_Point3.setY( randfloat() );
+    print( a_Point3, "Point3::setY()" );
+    if (iteration % 2) {
+    a_Point3.setElem( 2, randfloat() );
+    } else {
+    a_Point3.setElem( 2, scalar_float(randfloat()) );
+    }
+    print( a_Point3, "Point3::set( 2, float )" );
+    a_Point3[2] = randfloat();
+    a_Point3[2] *= randfloat();
+    a_Point3[2] /= randfloat();
+    a_Point3[2] += randfloat();
+    a_Point3[2] -= randfloat();
+    print( a_Point3, "Point3::operator [](2)" );
+    a_Point3.setZ( randfloat() );
+    print( a_Point3, "Point3::setZ()" );
+    printf("Point3::get( 0 ): %f\n", getfloat(a_Point3.getElem( 0 )) );
+    printf("Point3::operator []( 0 ): %f\n", getfloat((float)a_Point3[0]) );
+    printf("Point3::getX(): %f\n", getfloat(a_Point3.getX( )) );
+    printf("Point3::get( 1 ): %f\n", getfloat(a_Point3.getElem( 1 )) );
+    printf("Point3::operator []( 1 ): %f\n", getfloat((float)a_Point3[1]) );
+    printf("Point3::getY(): %f\n", getfloat(a_Point3.getY( )) );
+    printf("Point3::get( 2 ): %f\n", getfloat(a_Point3.getElem( 2 )) );
+    printf("Point3::operator []( 2 ): %f\n", getfloat((float)a_Point3[2]) );
+    printf("Point3::getZ(): %f\n", getfloat(a_Point3.getZ( )) );
+    print( ( a_Point3 - b_Point3 ), "Point3 - Point3" );
+    print( ( a_Point3 + b_Vector3 ), "Point3 + Vector3" );
+    print( ( a_Point3 - b_Vector3 ), "Point3 - Vector3" );
+    print( mulPerElem( a_Point3, b_Point3 ), "mulPerElem( Point3, Point3 )" );
+    print( divPerElem( a_Point3, b_Point3 ), "divPerElem( Point3, Point3 )" );
+    print( recipPerElem( a_Point3 ), "Point3 recip" );
+    print( sqrtPerElem( absPerElem( a_Point3 ) ), "Point3 sqrt" );
+    print( rsqrtPerElem( absPerElem( a_Point3 ) ), "Point3 rsqrt" );
+    print( absPerElem( a_Point3 ), "Point3 abs" );
+    print( copySignPerElem( a_Point3, b_Point3 ), "Point3 copySign" );
+    print( maxPerElem( a_Point3, b_Point3 ), "Point3 maximum Point3" );
+    print( minPerElem( a_Point3, b_Point3 ), "Point3 minimum Point3" );
+    printf("Point3 maximum of elements: %f\n", getfloat(maxElem( a_Point3 )));
+    printf("Point3 minimum of elements: %f\n", getfloat(minElem( a_Point3 )));
+    printf("Point3 sum of elements: %f\n", getfloat(sum( a_Point3 )));
+    printf("Point projection: %f\n", getfloat(projection( a_Point3, b_Vector3 )));
+    printf("Point distSqrFromOrigin: %f\n", getfloat(distSqrFromOrigin( a_Point3 )) );
+    printf("Point distFromOrigin: %f\n", getfloat(distFromOrigin( a_Point3 )) );
+    printf("Point distSqr: %f\n", getfloat(distSqr( a_Point3, b_Point3 )) );
+    printf("Point dist: %f\n", getfloat(dist( a_Point3, b_Point3 )) );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    e_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    a_Point3 = lerp( randfloat(), b_Point3, c_Point3 );
+    print( a_Point3, "Point3 lerp" );
+}
+
+void
+Quat_methods_test()
+{
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    Vector4 tmpV4;
+    Quat aos_Quat_0, aos_Quat_1, aos_Quat_2, aos_Quat_3;
+    float rndflt1, rndflt2, rndflt3, rndflt4, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Vector3 = Vector3( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3) );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    c_Vector3 = Vector3( 0.0f );
+    d_Vector3 = Vector3( 0.0f );
+    e_Vector3 = Vector3( 0.0f );
+    print( c_Vector3, "set Vector3 elements to zero" );
+    print( d_Vector3, "set Vector3 elements to zero" );
+    print( e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Vector4 = Vector4( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3), scalar_float(rndflt4) );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    c_Vector4 = Vector4( 0.0f );
+    d_Vector4 = Vector4( 0.0f );
+    e_Vector4 = Vector4( 0.0f );
+    print( c_Vector4, "set Vector4 elements to zero" );
+    print( d_Vector4, "set Vector4 elements to zero" );
+    print( e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Point3 = Point3( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3) );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    c_Point3 = Point3( 0.0f );
+    d_Point3 = Point3( 0.0f );
+    e_Point3 = Point3( 0.0f );
+    print( c_Point3, "set Point3 elements to zero" );
+    print( d_Point3, "set Point3 elements to zero" );
+    print( e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Quat = Quat( scalar_float(rndflt1), scalar_float(rndflt2), scalar_float(rndflt3), scalar_float(rndflt4) );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    c_Quat = Quat( 0.0f );
+    d_Quat = Quat( 0.0f );
+    e_Quat = Quat( 0.0f );
+    print( c_Quat, "set Quat elements to zero" );
+    print( d_Quat, "set Quat elements to zero" );
+    print( e_Quat, "set Quat elements to zero" );
+    a_Quat = Quat( a_Vector3, randfloat() );
+    print( a_Quat, "set Quat with Vector3, float" );
+    a_Quat = Quat( a_Vector4 );
+    print( a_Quat, "construct Quat with Vector4" );
+    a_Quat = Quat( randfloat() );
+    print( a_Quat, "set Quat with float" );
+    a_Quat = Quat( scalar_float(randfloat()) );
+    print( a_Quat, "set Quat with float" );
+    aos_Quat_0 = Quat( 0.0f, 1.0f, 2.0f, 3.0f );
+    aos_Quat_1 = Quat( 4.0f, 5.0f, 6.0f, 7.0f );
+    aos_Quat_2 = Quat( 8.0f, 9.0f, 10.0f, 11.0f );
+    aos_Quat_3 = Quat( 12.0f, 13.0f, 14.0f, 15.0f );
+    print( aos_Quat_3, "aos type 0" );
+    print( aos_Quat_2, "aos type 1" );
+    print( aos_Quat_1, "aos type 2" );
+    print( aos_Quat_0, "aos type 3" );
+    print( select( a_Quat, b_Quat, 0 ), "select 0" );
+    print( select( a_Quat, b_Quat, 1 ), "select 1" );
+    print( select( a_Quat, b_Quat, 0 ), "select 2" );
+    print( select( a_Quat, b_Quat, (unsigned int)-1 ), "select 3" );
+    a_Quat = b_Quat;
+    print( a_Quat, "assign to Quat from Quat" );
+    a_Quat.setXYZ( a_Vector3 );
+    print( a_Quat, "set Quat xyz" );
+    print( a_Quat.getXYZ( ), "get Quat xyz" );
+    a_Quat = Quat( 0.0f );
+    print( a_Quat, "set Quat elements to zero" );
+    if (iteration % 2) {
+    a_Quat.setElem( 0, randfloat() );
+    } else {
+    a_Quat.setElem( 0, scalar_float(randfloat()) );
+    }
+    print( a_Quat, "Quat::set( 0, float )" );
+    a_Quat[0] = randfloat();
+    a_Quat[0] *= randfloat();
+    a_Quat[0] /= randfloat();
+    a_Quat[0] += randfloat();
+    a_Quat[0] -= randfloat();
+    print( a_Quat, "Quat::operator [](0)" );
+    a_Quat.setX( randfloat() );
+    print( a_Quat, "Quat::setX()" );
+    if (iteration % 2) {
+    a_Quat.setElem( 1, randfloat() );
+    } else {
+    a_Quat.setElem( 1, scalar_float(randfloat()) );
+    }
+    print( a_Quat, "Quat::set( 1, float )" );
+    a_Quat[1] = randfloat();
+    a_Quat[1] *= randfloat();
+    a_Quat[1] /= randfloat();
+    a_Quat[1] += randfloat();
+    a_Quat[1] -= randfloat();
+    print( a_Quat, "Quat::operator [](1)" );
+    a_Quat.setY( randfloat() );
+    print( a_Quat, "Quat::setY()" );
+    if (iteration % 2) {
+    a_Quat.setElem( 2, randfloat() );
+    } else {
+    a_Quat.setElem( 2, scalar_float(randfloat()) );
+    }
+    print( a_Quat, "Quat::set( 2, float )" );
+    a_Quat[2] = randfloat();
+    a_Quat[2] *= randfloat();
+    a_Quat[2] /= randfloat();
+    a_Quat[2] += randfloat();
+    a_Quat[2] -= randfloat();
+    print( a_Quat, "Quat::operator [](2)" );
+    a_Quat.setZ( randfloat() );
+    print( a_Quat, "Quat::setZ()" );
+    if (iteration % 2) {
+    a_Quat.setElem( 3, randfloat() );
+    } else {
+    a_Quat.setElem( 3, scalar_float(randfloat()) );
+    }
+    print( a_Quat, "Quat::set( 3, float )" );
+    a_Quat[3] = randfloat();
+    a_Quat[3] *= randfloat();
+    a_Quat[3] /= randfloat();
+    a_Quat[3] += randfloat();
+    a_Quat[3] -= randfloat();
+    print( a_Quat, "Quat::operator [](3)" );
+    a_Quat.setW( randfloat() );
+    print( a_Quat, "Quat::setW()" );
+    printf("Quat::get( 0 ): %f\n", getfloat(a_Quat.getElem( 0 )) );
+    printf("Quat::operator []( 0 ): %f\n", getfloat((float)a_Quat[0]) );
+    printf("Quat::getX(): %f\n", getfloat(a_Quat.getX( )) );
+    printf("Quat::get( 1 ): %f\n", getfloat(a_Quat.getElem( 1 )) );
+    printf("Quat::operator []( 1 ): %f\n", getfloat((float)a_Quat[1]) );
+    printf("Quat::getY(): %f\n", getfloat(a_Quat.getY( )) );
+    printf("Quat::get( 2 ): %f\n", getfloat(a_Quat.getElem( 2 )) );
+    printf("Quat::operator []( 2 ): %f\n", getfloat((float)a_Quat[2]) );
+    printf("Quat::getZ(): %f\n", getfloat(a_Quat.getZ( )) );
+    printf("Quat::get( 3 ): %f\n", getfloat(a_Quat.getElem( 3 )) );
+    printf("Quat::operator []( 3 ): %f\n", getfloat((float)a_Quat[3]) );
+    printf("Quat::getW(): %f\n", getfloat(a_Quat.getW( )) );
+    print( ( a_Quat + b_Quat ), "Quat + Quat" );
+    print( ( a_Quat - b_Quat ), "Quat - Quat" );
+    print( ( a_Quat * b_Quat ), "Quat * Quat" );
+    print( ( a_Quat * randfloat() ), "Quat * float" );
+    print( ( a_Quat / randfloat() ), "Quat / float" );
+    print( ( randfloat() * a_Quat ), "float * Quat" );
+    print( ( -a_Quat ), "Quat negate" );
+    printf("Quat dot Quat: %f\n", getfloat(dot( a_Quat, b_Quat )));
+    printf("Quat lengthSqr: %f\n", getfloat(norm( a_Quat )));
+    printf("Quat length: %f\n", getfloat(length( a_Quat )));
+    print( normalize( a_Quat ), "Quat normalized" );
+    a_Quat = Quat::identity( );
+    print( a_Quat, "set to identity" );
+    a_Quat = Quat::rotation( a_Vector3, b_Vector3 );
+    print( a_Quat, "Quat rotation between vectors" );
+    a_Quat = Quat::rotation( randfloat(), a_Vector3 );
+    print( a_Quat, "Quat rotation axis angle" );
+    a_Quat = Quat::rotationX( randfloat() );
+    print( a_Quat, "Quat rotationX" );
+    a_Quat = Quat::rotationY( randfloat() );
+    print( a_Quat, "Quat rotationY" );
+    a_Quat = Quat::rotationZ( randfloat() );
+    print( a_Quat, "Quat rotationZ" );
+    print( rotate( a_Quat, a_Vector3 ), "Quat rotate Vector3" );
+    print( conj( a_Quat ), "Quat conj" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    e_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    b_Quat = normalize( b_Quat );
+    c_Quat = normalize( c_Quat );
+    d_Quat = normalize( d_Quat );
+    e_Quat = normalize( e_Quat );
+    a_Quat = lerp( randfloat(), b_Quat, c_Quat );
+    print( a_Quat, "Quat lerp" );
+    a_Quat = slerp( randfloat(), b_Quat, c_Quat );
+    print( a_Quat, "Quat slerp" );
+    a_Quat = squad( randfloat(), b_Quat, c_Quat, d_Quat, e_Quat );
+    print( a_Quat, "Quat squad" );
+}
+
+int main()
+{
+    printf("\n __begin__ \n");
+    for ( iteration = 0; iteration < 2; iteration++ ) {
+        Vector3_methods_test();
+        Vector4_methods_test();
+        Point3_methods_test();
+        Quat_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test1_reference.txt b/Extras/vectormathlibrary/tests/test1_reference.txt
new file mode 100644
index 000000000..69af2d5b6
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test1_reference.txt
@@ -0,0 +1,680 @@
+set Vector3 with floats: ( 0.465039 -0.479556 -0.211412 )
+set Vector3 with floats: ( 0.553580 0.690070 0.151576 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector4 with floats: ( 0.431077 -0.833992 -0.088350 -0.780106 )
+set Vector4 with floats: ( 0.090456 -0.218627 0.137171 0.918133 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Point3 with floats: ( 0.735438 -0.673621 -0.448982 )
+set Point3 with floats: ( -0.479278 0.848189 -0.128155 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Quat with floats: ( 0.578922 -0.744766 -0.835589 0.881284 )
+set Quat with floats: ( -0.948850 -0.691578 -0.235635 -0.690527 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+construct Vector3 with Point3: ( 0.735438 -0.673621 -0.448982 )
+set Vector3 with float: ( 0.058667 0.058667 0.058667 )
+set Vector3 with float: ( 0.753697 0.753697 0.753697 )
+aos type 0: ( 9.000000 10.000000 11.000000 )
+aos type 1: ( 6.000000 7.000000 8.000000 )
+aos type 2: ( 3.000000 4.000000 5.000000 )
+aos type 3: ( 0.000000 1.000000 2.000000 )
+select 0: ( 0.753697 0.753697 0.753697 )
+select 1: ( 0.553580 0.690070 0.151576 )
+select 2: ( 0.753697 0.753697 0.753697 )
+select 3: ( 0.553580 0.690070 0.151576 )
+load XYZ array: ( -0.658344 0.499804 -0.807257 )
+xyzx: ( 0.658344 -0.499804 0.807257 -0.740930 )
+yzxy: ( -0.154607 -0.571599 -0.384388 0.262467 )
+zxyz: ( -0.747808 -0.490190 0.107908 0.292544 )
+xyzx: ( 0.658344 -0.499804 0.807257 -0.740930 )
+yzxy: ( -0.154607 -0.571599 -0.384388 0.262467 )
+zxyz: ( -0.747808 -0.490190 0.107908 0.292544 )
+storeXYZ:-1.0 -2.0 -3.0 0.4
+assign to Vector3 from Vector3: ( 0.553580 0.690070 0.151576 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set to x axis: ( 1.000000 0.000000 0.000000 )
+set to y axis: ( 0.000000 1.000000 0.000000 )
+set to z axis: ( 0.000000 0.000000 1.000000 )
+Vector3::set( 0, float ): ( -0.138777 0.000000 1.000000 )
+Vector3::operator [](0): ( -1.001420 0.000000 1.000000 )
+Vector3::setX(): ( 0.410391 0.000000 1.000000 )
+Vector3::set( 1, float ): ( 0.410391 -0.562721 1.000000 )
+Vector3::operator [](1): ( 0.410391 0.544473 1.000000 )
+Vector3::setY(): ( 0.410391 0.374881 1.000000 )
+Vector3::set( 2, float ): ( 0.410391 0.374881 -0.127818 )
+Vector3::operator [](2): ( 0.410391 0.374881 -0.212598 )
+Vector3::setZ(): ( 0.410391 0.374881 -0.723410 )
+Vector3::get( 0 ): 0.410391
+Vector3::operator []( 0 ): 0.410391
+Vector3::getX(): 0.410391
+Vector3::get( 1 ): 0.374881
+Vector3::operator []( 1 ): 0.374881
+Vector3::getY(): 0.374881
+Vector3::get( 2 ): -0.723410
+Vector3::operator []( 2 ): -0.723410
+Vector3::getZ(): -0.723410
+Vector3 + Vector3: ( 0.963971 1.064952 -0.571834 )
+Vector3 - Vector3: ( -0.143189 -0.315189 -0.874987 )
+Vector3 + Point3: ( -0.068887 1.223071 -0.851565 )
+Vector3 * float: ( 0.378185 0.345462 -0.666639 )
+Vector3 / float: ( -0.577000 -0.527074 1.017097 )
+float * Vector3: ( -0.043762 -0.039975 0.077140 )
+Vector3 negate: ( -0.410391 -0.374881 0.723410 )
+mulPerElem( Vector3, Vector3 ): ( 0.227184 0.258694 -0.109652 )
+divPerElem( Vector3, Vector3 ): ( 0.741340 0.543251 -4.772578 )
+Vector3 recip: ( 2.436700 2.667511 -1.382342 )
+Vector3 sqrt: ( 0.640618 0.612276 0.850535 )
+Vector3 rsqrt: ( 1.560993 1.633252 1.175730 )
+Vector3 abs: ( 0.410391 0.374881 0.723410 )
+Vector3 copySign: ( 0.410391 0.374881 0.723410 )
+Vector3 maximum Vector3: ( 0.553580 0.690070 0.151576 )
+Vector3 minimum Vector3: ( 0.410391 0.374881 -0.723410 )
+Vector3 maximum of elements: 0.410391
+Vector3 minimum of elements: -0.723410
+Vector3 sum of elements: 0.061862
+Vector3 dot Vector3: 0.376227
+Vector3 lengthSqr: 0.832279
+Vector3 length: 0.912293
+Vector3 normalized: ( 0.449846 0.410922 -0.792958 )
+Vector3 lerp: ( -0.504755 2.242539 -1.172064 )
+Vector3 slerp: ( -0.308956 -0.503372 0.806947 )
+set Vector3 with floats: ( 0.932526 0.571087 0.610330 )
+set Vector3 with floats: ( 0.142507 -0.434829 0.925102 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector4 with floats: ( 0.158954 -0.126283 -0.249128 0.846815 )
+set Vector4 with floats: ( -0.942601 0.537720 0.446214 0.181939 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Point3 with floats: ( -0.148223 0.284286 0.493525 )
+set Point3 with floats: ( -0.861963 -0.893410 0.548627 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Quat with floats: ( 0.407007 -0.757467 -0.393126 -0.850984 )
+set Quat with floats: ( 0.375720 -0.270088 0.458888 -0.610828 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 with Vector3, float: ( 0.932526 0.571087 0.610330 -0.690816 )
+set Vector4 with Vector3: ( 0.932526 0.571087 0.610330 0.000000 )
+set Vector4 with Point3: ( -0.148223 0.284286 0.493525 1.000000 )
+construct Vector4 with Quat: ( 0.407007 -0.757467 -0.393126 -0.850984 )
+set Vector4 with float: ( -0.676415 -0.676415 -0.676415 -0.676415 )
+set Vector4 with float: ( 0.664466 0.664466 0.664466 0.664466 )
+aos type 0: ( 12.000000 13.000000 14.000000 15.000000 )
+aos type 1: ( 8.000000 9.000000 10.000000 11.000000 )
+aos type 2: ( 4.000000 5.000000 6.000000 7.000000 )
+aos type 3: ( 0.000000 1.000000 2.000000 3.000000 )
+select 0: ( 0.664466 0.664466 0.664466 0.664466 )
+select 1: ( -0.942601 0.537720 0.446214 0.181939 )
+select 2: ( 0.664466 0.664466 0.664466 0.664466 )
+select 3: ( -0.942601 0.537720 0.446214 0.181939 )
+assign to Vector4 from Vector4: ( -0.942601 0.537720 0.446214 0.181939 )
+set Vector4 xyz: ( 0.932526 0.571087 0.610330 0.181939 )
+get Vector4 xyz: ( 0.932526 0.571087 0.610330 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set to x axis: ( 1.000000 0.000000 0.000000 0.000000 )
+set to y axis: ( 0.000000 1.000000 0.000000 0.000000 )
+set to z axis: ( 0.000000 0.000000 1.000000 0.000000 )
+set to w axis: ( 0.000000 0.000000 0.000000 1.000000 )
+Vector4::set( 0, float ): ( 0.101874 0.000000 0.000000 1.000000 )
+Vector4::operator [](0): ( -0.879952 0.000000 0.000000 1.000000 )
+Vector4::setX(): ( -0.997261 0.000000 0.000000 1.000000 )
+Vector4::set( 1, float ): ( -0.997261 0.172409 0.000000 1.000000 )
+Vector4::operator [](1): ( -0.997261 0.150201 0.000000 1.000000 )
+Vector4::setY(): ( -0.997261 -0.000013 0.000000 1.000000 )
+Vector4::set( 2, float ): ( -0.997261 -0.000013 0.689543 1.000000 )
+Vector4::operator [](2): ( -0.997261 -0.000013 -0.384253 1.000000 )
+Vector4::setZ(): ( -0.997261 -0.000013 -0.801022 1.000000 )
+Vector4::set( 3, float ): ( -0.997261 -0.000013 -0.801022 0.656335 )
+Vector4::operator [](3): ( -0.997261 -0.000013 -0.801022 16.472569 )
+Vector4::setW(): ( -0.997261 -0.000013 -0.801022 0.133022 )
+Vector4::get( 0 ): -0.997261
+Vector4::operator []( 0 ): -0.997261
+Vector4::getX(): -0.997261
+Vector4::get( 1 ): -0.000013
+Vector4::operator []( 1 ): -0.000013
+Vector4::getY(): -0.000013
+Vector4::get( 2 ): -0.801022
+Vector4::operator []( 2 ): -0.801022
+Vector4::getZ(): -0.801022
+Vector4::get( 3 ): 0.133022
+Vector4::operator []( 3 ): 0.133022
+Vector4::getW(): 0.133022
+Vector4 + Vector4: ( -1.939863 0.537707 -0.354808 0.314960 )
+Vector4 - Vector4: ( -0.054660 -0.537733 -1.247235 -0.048917 )
+Vector4 * float: ( 0.052076 0.000001 0.041829 -0.006946 )
+Vector4 / float: ( 6.048177 0.000081 4.858026 -0.806748 )
+float * Vector4: ( -0.299867 -0.000004 -0.240859 0.039998 )
+Vector4 negate: ( 0.997261 0.000013 0.801022 -0.133022 )
+mulPerElem( Vector4, Vector4 ): ( 0.940020 -0.000007 -0.357427 0.024202 )
+divPerElem( Vector4, Vector4 ): ( 1.057989 -0.000025 -1.795154 0.731134 )
+Vector4 recip: ( -1.002746 -75309.887339 -1.248405 7.517576 )
+Vector4 sqrt: ( 0.998630 0.003644 0.894998 0.364721 )
+Vector4 rsqrt: ( 1.001372 274.426483 1.117321 2.741820 )
+Vector4 abs: ( 0.997261 0.000013 0.801022 0.133022 )
+Vector4 copySign: ( -0.997261 0.000013 0.801022 0.133022 )
+Vector4 maximum Vector4: ( -0.942601 0.537720 0.446214 0.181939 )
+Vector4 minimum Vector4: ( -0.997261 -0.000013 -0.801022 0.133022 )
+Vector4 maximum of elements: 0.133022
+Vector4 minimum of elements: -0.997261
+Vector4 sum of elements: -1.665275
+Vector4 dot Vector4: 0.606788
+Vector4 lengthSqr: 1.653861
+Vector4 length: 1.286025
+Vector4 normalized: ( -0.775460 -0.000010 -0.622866 0.103436 )
+Vector4 lerp: ( -0.226675 0.250339 0.113208 0.735255 )
+Vector4 slerp: ( 0.351058 0.199306 -0.276993 0.871958 )
+set Vector3 with floats: ( 0.137637 -0.111879 -0.929543 )
+set Vector3 with floats: ( -0.336303 -0.146740 0.165140 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector4 with floats: ( -0.823874 0.349776 0.174872 -0.528584 )
+set Vector4 with floats: ( 0.489292 0.916708 0.728511 -0.851140 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Point3 with floats: ( 0.079620 -0.234370 -0.996308 )
+set Point3 with floats: ( 0.433229 -0.892684 -0.957911 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Quat with floats: ( 0.517122 0.257921 0.862028 0.095881 )
+set Quat with floats: ( -0.171933 -0.214078 -0.604841 -0.383831 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+construct Point3 with Vector3: ( 0.137637 -0.111879 -0.929543 )
+set Point3 with float: ( -0.581500 -0.581500 -0.581500 )
+set Point3 with float: ( 0.222183 0.222183 0.222183 )
+aos type 0: ( 9.000000 10.000000 11.000000 )
+aos type 1: ( 6.000000 7.000000 8.000000 )
+aos type 2: ( 3.000000 4.000000 5.000000 )
+aos type 3: ( 0.000000 1.000000 2.000000 )
+select 0: ( 0.222183 0.222183 0.222183 )
+select 1: ( 0.433229 -0.892684 -0.957911 )
+select 2: ( 0.222183 0.222183 0.222183 )
+select 3: ( 0.433229 -0.892684 -0.957911 )
+load XYZ array: ( -0.160082 0.962714 0.737794 )
+xyzx: ( 0.160082 -0.962714 -0.737794 0.071926 )
+yzxy: ( 0.506313 -0.689277 -0.686485 -0.473013 )
+zxyz: ( 0.735610 0.046390 -0.568674 0.004815 )
+xyzx: ( 0.160082 -0.962714 -0.737794 0.071926 )
+yzxy: ( 0.506313 -0.689277 -0.686485 -0.473013 )
+zxyz: ( 0.735610 0.046390 -0.568674 0.004815 )
+storeXYZ:-1.0 -2.0 -3.0 0.4
+assign to Point3 from Point3: ( 0.433229 -0.892684 -0.957911 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+Point3::set( 0, float ): ( -0.256120 0.000000 0.000000 )
+Point3::operator [](0): ( -0.979920 0.000000 0.000000 )
+Point3::setX(): ( -0.046627 0.000000 0.000000 )
+Point3::set( 1, float ): ( -0.046627 -0.716491 0.000000 )
+Point3::operator [](1): ( -0.046627 1.719236 0.000000 )
+Point3::setY(): ( -0.046627 0.508814 0.000000 )
+Point3::set( 2, float ): ( -0.046627 0.508814 -0.238839 )
+Point3::operator [](2): ( -0.046627 0.508814 0.061402 )
+Point3::setZ(): ( -0.046627 0.508814 -0.157280 )
+Point3::get( 0 ): -0.046627
+Point3::operator []( 0 ): -0.046627
+Point3::getX(): -0.046627
+Point3::get( 1 ): 0.508814
+Point3::operator []( 1 ): 0.508814
+Point3::getY(): 0.508814
+Point3::get( 2 ): -0.157280
+Point3::operator []( 2 ): -0.157280
+Point3::getZ(): -0.157280
+Point3 - Point3: ( -0.479856 1.401498 0.800631 )
+Point3 + Vector3: ( -0.382931 0.362074 0.007860 )
+Point3 - Vector3: ( 0.289676 0.655554 -0.322420 )
+mulPerElem( Point3, Point3 ): ( -0.020200 -0.454210 0.150660 )
+divPerElem( Point3, Point3 ): ( -0.107627 -0.569982 0.164191 )
+Point3 recip: ( -21.446777 1.965355 -6.358085 )
+Point3 sqrt: ( 0.215933 0.713312 0.396586 )
+Point3 rsqrt: ( 4.631066 1.401911 2.521524 )
+Point3 abs: ( 0.046627 0.508814 0.157280 )
+Point3 copySign: ( 0.046627 -0.508814 -0.157280 )
+Point3 maximum Point3: ( 0.433229 0.508814 -0.157280 )
+Point3 minimum Point3: ( -0.046627 -0.892684 -0.957911 )
+Point3 maximum of elements: 0.508814
+Point3 minimum of elements: -0.157280
+Point3 sum of elements: 0.304907
+Point projection: -0.084956
+Point distSqrFromOrigin: 0.285803
+Point distFromOrigin: 0.534605
+Point distSqr: 2.835468
+Point dist: 1.683885
+Point3 lerp: ( 0.508235 -0.017312 0.360111 )
+set Vector3 with floats: ( 0.219317 -0.118359 0.413442 )
+set Vector3 with floats: ( -0.567698 0.531358 -0.387226 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector4 with floats: ( 0.572490 -0.820417 0.797191 0.867178 )
+set Vector4 with floats: ( 0.934764 0.237092 -0.866162 -0.773939 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Point3 with floats: ( 0.261311 -0.851570 0.114814 )
+set Point3 with floats: ( -0.531592 0.223925 0.869105 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Quat with floats: ( 0.143405 0.148518 -0.071136 -0.758292 )
+set Quat with floats: ( -0.527633 0.997215 0.114440 0.727558 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat with Vector3, float: ( 0.219317 -0.118359 0.413442 -0.425760 )
+construct Quat with Vector4: ( 0.572490 -0.820417 0.797191 0.867178 )
+set Quat with float: ( 0.459888 0.459888 0.459888 0.459888 )
+set Quat with float: ( 0.642516 0.642516 0.642516 0.642516 )
+aos type 0: ( 12.000000 13.000000 14.000000 15.000000 )
+aos type 1: ( 8.000000 9.000000 10.000000 11.000000 )
+aos type 2: ( 4.000000 5.000000 6.000000 7.000000 )
+aos type 3: ( 0.000000 1.000000 2.000000 3.000000 )
+select 0: ( 0.642516 0.642516 0.642516 0.642516 )
+select 1: ( -0.527633 0.997215 0.114440 0.727558 )
+select 2: ( 0.642516 0.642516 0.642516 0.642516 )
+select 3: ( -0.527633 0.997215 0.114440 0.727558 )
+assign to Quat from Quat: ( -0.527633 0.997215 0.114440 0.727558 )
+set Quat xyz: ( 0.219317 -0.118359 0.413442 0.727558 )
+get Quat xyz: ( 0.219317 -0.118359 0.413442 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+Quat::set( 0, float ): ( -0.022534 0.000000 0.000000 0.000000 )
+Quat::operator [](0): ( 0.419714 0.000000 0.000000 0.000000 )
+Quat::setX(): ( 0.765986 0.000000 0.000000 0.000000 )
+Quat::set( 1, float ): ( 0.765986 -0.137795 0.000000 0.000000 )
+Quat::operator [](1): ( 0.765986 0.890273 0.000000 0.000000 )
+Quat::setY(): ( 0.765986 -0.553800 0.000000 0.000000 )
+Quat::set( 2, float ): ( 0.765986 -0.553800 -0.014688 0.000000 )
+Quat::operator [](2): ( 0.765986 -0.553800 -0.802660 0.000000 )
+Quat::setZ(): ( 0.765986 -0.553800 0.499529 0.000000 )
+Quat::set( 3, float ): ( 0.765986 -0.553800 0.499529 0.385180 )
+Quat::operator [](3): ( 0.765986 -0.553800 0.499529 -0.150467 )
+Quat::setW(): ( 0.765986 -0.553800 0.499529 0.067637 )
+Quat::get( 0 ): 0.765986
+Quat::operator []( 0 ): 0.765986
+Quat::getX(): 0.765986
+Quat::get( 1 ): -0.553800
+Quat::operator []( 1 ): -0.553800
+Quat::getY(): -0.553800
+Quat::get( 2 ): 0.499529
+Quat::operator []( 2 ): 0.499529
+Quat::getZ(): 0.499529
+Quat::get( 3 ): 0.067637
+Quat::operator []( 3 ): 0.067637
+Quat::getW(): 0.067637
+Quat + Quat: ( 0.238353 0.443416 0.613969 0.795196 )
+Quat - Quat: ( 1.293620 -1.551015 0.385089 -0.659921 )
+Quat * Quat: ( -0.039902 -0.686700 0.842827 0.948461 )
+Quat * float: ( 0.611545 -0.442140 0.398812 0.054000 )
+Quat / float: ( 2.464705 -1.781954 1.607327 0.217635 )
+float * Quat: ( 0.659770 -0.477006 0.430261 0.058258 )
+Quat negate: ( -0.765986 0.553800 -0.499529 -0.067637 )
+Quat dot Quat: -0.850041
+Quat lengthSqr: 1.147533
+Quat length: 1.071229
+Quat normalized: ( 0.715053 -0.516976 0.466313 0.063140 )
+set to identity: ( 0.000000 0.000000 0.000000 1.000000 )
+Quat rotation between vectors: ( -0.152187 -0.131117 0.043194 0.571186 )
+Quat rotation axis angle: ( -0.103250 0.055721 -0.194639 0.882252 )
+Quat rotationX: ( -0.321727 0.000000 0.000000 0.946833 )
+Quat rotationY: ( 0.000000 0.142892 0.000000 0.989738 )
+Quat rotationZ: ( 0.000000 0.000000 0.262912 0.964820 )
+Quat rotate Vector3: ( 0.249044 0.009268 0.413442 )
+Quat conj: ( -0.000000 -0.000000 -0.262912 0.964820 )
+Quat lerp: ( -0.053419 0.446373 0.392075 0.581368 )
+Quat slerp: ( -0.630711 0.472429 0.365730 0.495233 )
+Quat squad: ( 0.119307 -0.523957 -0.534187 -0.652594 )
+set Vector3 with floats: ( 0.531906 0.271995 -0.862601 )
+set Vector3 with floats: ( -0.738694 0.514248 -0.039363 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector4 with floats: ( 0.429390 -0.769469 0.281336 -0.203301 )
+set Vector4 with floats: ( 0.412586 0.567926 0.410131 -0.462918 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Point3 with floats: ( 0.560953 -0.731715 -0.446158 )
+set Point3 with floats: ( -0.837491 -0.573480 -0.607820 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Quat with floats: ( 0.238415 0.213445 0.098612 0.135072 )
+set Quat with floats: ( -0.749274 -0.855977 0.765675 -0.693447 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+construct Vector3 with Point3: ( 0.560953 -0.731715 -0.446158 )
+set Vector3 with float: ( 0.131554 0.131554 0.131554 )
+set Vector3 with float: ( -0.366757 -0.366757 -0.366757 )
+aos type 0: ( 9.000000 10.000000 11.000000 )
+aos type 1: ( 6.000000 7.000000 8.000000 )
+aos type 2: ( 3.000000 4.000000 5.000000 )
+aos type 3: ( 0.000000 1.000000 2.000000 )
+select 0: ( -0.366757 -0.366757 -0.366757 )
+select 1: ( -0.738694 0.514248 -0.039363 )
+select 2: ( -0.366757 -0.366757 -0.366757 )
+select 3: ( -0.738694 0.514248 -0.039363 )
+load XYZ array: ( -0.913636 0.675222 0.144053 )
+xyzx: ( 0.913636 -0.675222 -0.144053 0.632329 )
+yzxy: ( 0.947120 0.049367 -0.126333 0.664206 )
+zxyz: ( -0.220879 -0.284219 0.387216 -0.913568 )
+xyzx: ( 0.913636 -0.675222 -0.144053 0.632329 )
+yzxy: ( 0.947120 0.049367 -0.126333 0.664206 )
+zxyz: ( -0.220879 -0.284219 0.387216 -0.913568 )
+storeXYZ:-1.0 -2.0 -3.0 0.4
+assign to Vector3 from Vector3: ( -0.738694 0.514248 -0.039363 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set to x axis: ( 1.000000 0.000000 0.000000 )
+set to y axis: ( 0.000000 1.000000 0.000000 )
+set to z axis: ( 0.000000 0.000000 1.000000 )
+Vector3::set( 0, float ): ( -0.330409 0.000000 1.000000 )
+Vector3::operator [](0): ( -1.321680 0.000000 1.000000 )
+Vector3::setX(): ( -0.870739 0.000000 1.000000 )
+Vector3::set( 1, float ): ( -0.870739 0.415605 1.000000 )
+Vector3::operator [](1): ( -0.870739 -0.855146 1.000000 )
+Vector3::setY(): ( -0.870739 0.876855 1.000000 )
+Vector3::set( 2, float ): ( -0.870739 0.876855 0.982846 )
+Vector3::operator [](2): ( -0.870739 0.876855 -0.725814 )
+Vector3::setZ(): ( -0.870739 0.876855 -0.123567 )
+Vector3::get( 0 ): -0.870739
+Vector3::operator []( 0 ): -0.870739
+Vector3::getX(): -0.870739
+Vector3::get( 1 ): 0.876855
+Vector3::operator []( 1 ): 0.876855
+Vector3::getY(): 0.876855
+Vector3::get( 2 ): -0.123567
+Vector3::operator []( 2 ): -0.123567
+Vector3::getZ(): -0.123567
+Vector3 + Vector3: ( -1.609433 1.391104 -0.162930 )
+Vector3 - Vector3: ( -0.132046 0.362607 -0.084203 )
+Vector3 + Point3: ( -1.708231 0.303375 -0.731387 )
+Vector3 * float: ( -0.354427 0.356917 -0.050297 )
+Vector3 / float: ( -6.166518 6.209829 -0.875092 )
+float * Vector3: ( 0.690557 -0.695407 0.097997 )
+Vector3 negate: ( 0.870739 -0.876855 0.123567 )
+mulPerElem( Vector3, Vector3 ): ( 0.643210 0.450921 0.004864 )
+divPerElem( Vector3, Vector3 ): ( 1.178756 1.705119 3.139138 )
+Vector3 recip: ( -1.148449 1.140439 -8.092791 )
+Vector3 sqrt: ( 0.933134 0.936405 0.351521 )
+Vector3 rsqrt: ( 1.071657 1.067914 2.844783 )
+Vector3 abs: ( 0.870739 0.876855 0.123567 )
+Vector3 copySign: ( -0.870739 0.876855 -0.123567 )
+Vector3 maximum Vector3: ( -0.738694 0.876855 -0.039363 )
+Vector3 minimum Vector3: ( -0.870739 0.514248 -0.123567 )
+Vector3 maximum of elements: 0.876855
+Vector3 minimum of elements: -0.870739
+Vector3 sum of elements: -0.117451
+Vector3 dot Vector3: 1.098995
+Vector3 lengthSqr: 1.542330
+Vector3 length: 1.241906
+Vector3 normalized: ( -0.701131 0.706056 -0.099498 )
+Vector3 lerp: ( -0.118997 -0.338092 0.499550 )
+Vector3 slerp: ( -0.899759 -0.236062 -0.367027 )
+set Vector3 with floats: ( 0.060414 -0.867395 -0.702364 )
+set Vector3 with floats: ( -0.182602 -0.832807 0.278191 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector4 with floats: ( -0.967562 -0.520296 0.160191 -0.677990 )
+set Vector4 with floats: ( -0.470750 -0.846580 -0.705751 -0.825368 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Point3 with floats: ( -0.712890 -0.064487 0.444065 )
+set Point3 with floats: ( -0.045226 0.116544 -0.007285 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Quat with floats: ( -0.838230 -0.410767 -0.409299 -0.336683 )
+set Quat with floats: ( -0.830700 -0.801729 -0.595153 -0.784672 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 with Vector3, float: ( 0.060414 -0.867395 -0.702364 -0.653655 )
+set Vector4 with Vector3: ( 0.060414 -0.867395 -0.702364 0.000000 )
+set Vector4 with Point3: ( -0.712890 -0.064487 0.444065 1.000000 )
+construct Vector4 with Quat: ( -0.838230 -0.410767 -0.409299 -0.336683 )
+set Vector4 with float: ( 0.670791 0.670791 0.670791 0.670791 )
+set Vector4 with float: ( 0.653571 0.653571 0.653571 0.653571 )
+aos type 0: ( 12.000000 13.000000 14.000000 15.000000 )
+aos type 1: ( 8.000000 9.000000 10.000000 11.000000 )
+aos type 2: ( 4.000000 5.000000 6.000000 7.000000 )
+aos type 3: ( 0.000000 1.000000 2.000000 3.000000 )
+select 0: ( 0.653571 0.653571 0.653571 0.653571 )
+select 1: ( -0.470750 -0.846580 -0.705751 -0.825368 )
+select 2: ( 0.653571 0.653571 0.653571 0.653571 )
+select 3: ( -0.470750 -0.846580 -0.705751 -0.825368 )
+assign to Vector4 from Vector4: ( -0.470750 -0.846580 -0.705751 -0.825368 )
+set Vector4 xyz: ( 0.060414 -0.867395 -0.702364 -0.825368 )
+get Vector4 xyz: ( 0.060414 -0.867395 -0.702364 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set to x axis: ( 1.000000 0.000000 0.000000 0.000000 )
+set to y axis: ( 0.000000 1.000000 0.000000 0.000000 )
+set to z axis: ( 0.000000 0.000000 1.000000 0.000000 )
+set to w axis: ( 0.000000 0.000000 0.000000 1.000000 )
+Vector4::set( 0, float ): ( 0.850716 0.000000 0.000000 1.000000 )
+Vector4::operator [](0): ( -0.528836 0.000000 0.000000 1.000000 )
+Vector4::setX(): ( 0.965901 0.000000 0.000000 1.000000 )
+Vector4::set( 1, float ): ( 0.965901 -0.072675 0.000000 1.000000 )
+Vector4::operator [](1): ( 0.965901 0.450700 0.000000 1.000000 )
+Vector4::setY(): ( 0.965901 -0.825793 0.000000 1.000000 )
+Vector4::set( 2, float ): ( 0.965901 -0.825793 0.597719 1.000000 )
+Vector4::operator [](2): ( 0.965901 -0.825793 0.279622 1.000000 )
+Vector4::setZ(): ( 0.965901 -0.825793 -0.884427 1.000000 )
+Vector4::set( 3, float ): ( 0.965901 -0.825793 -0.884427 -0.756791 )
+Vector4::operator [](3): ( 0.965901 -0.825793 -0.884427 -0.685697 )
+Vector4::setW(): ( 0.965901 -0.825793 -0.884427 -0.684626 )
+Vector4::get( 0 ): 0.965901
+Vector4::operator []( 0 ): 0.965901
+Vector4::getX(): 0.965901
+Vector4::get( 1 ): -0.825793
+Vector4::operator []( 1 ): -0.825793
+Vector4::getY(): -0.825793
+Vector4::get( 2 ): -0.884427
+Vector4::operator []( 2 ): -0.884427
+Vector4::getZ(): -0.884427
+Vector4::get( 3 ): -0.684626
+Vector4::operator []( 3 ): -0.684626
+Vector4::getW(): -0.684626
+Vector4 + Vector4: ( 0.495151 -1.672373 -1.590178 -1.509993 )
+Vector4 - Vector4: ( 1.436652 0.020787 -0.178676 0.140742 )
+Vector4 * float: ( -0.960151 0.820877 0.879161 0.680550 )
+Vector4 / float: ( 1.206825 -1.031770 -1.105029 -0.855391 )
+float * Vector4: ( -0.546353 0.467102 0.500267 0.387252 )
+Vector4 negate: ( -0.965901 0.825793 0.884427 0.684626 )
+mulPerElem( Vector4, Vector4 ): ( -0.454698 0.699100 0.624185 0.565068 )
+divPerElem( Vector4, Vector4 ): ( -2.051834 0.975446 1.253172 0.829480 )
+Vector4 recip: ( 1.035302 -1.210957 -1.130676 -1.460652 )
+Vector4 sqrt: ( 0.982803 0.908731 0.940440 0.827421 )
+Vector4 rsqrt: ( 1.017498 1.100435 1.063332 1.208575 )
+Vector4 abs: ( 0.965901 0.825793 0.884427 0.684626 )
+Vector4 copySign: ( -0.965901 -0.825793 -0.884427 -0.684626 )
+Vector4 maximum Vector4: ( 0.965901 -0.825793 -0.705751 -0.684626 )
+Vector4 minimum Vector4: ( -0.470750 -0.846580 -0.884427 -0.825368 )
+Vector4 maximum of elements: 0.965901
+Vector4 minimum of elements: -0.884427
+Vector4 sum of elements: -1.428944
+Vector4 dot Vector4: 1.433654
+Vector4 lengthSqr: 2.865822
+Vector4 length: 1.692874
+Vector4 normalized: ( 0.570569 -0.487805 -0.522441 -0.404416 )
+Vector4 lerp: ( -0.436271 0.676982 0.475175 0.275280 )
+Vector4 slerp: ( -0.493672 0.661673 0.416222 0.381100 )
+set Vector3 with floats: ( -0.287826 0.942655 -0.634432 )
+set Vector3 with floats: ( -0.140438 0.570869 -0.764965 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector4 with floats: ( 0.067523 -0.514589 0.233091 0.554488 )
+set Vector4 with floats: ( -0.633529 -0.019374 0.869259 -0.369818 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Point3 with floats: ( -0.280690 -0.797209 -0.255233 )
+set Point3 with floats: ( 0.780605 0.789803 0.974262 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Quat with floats: ( -0.785980 -0.701386 0.871088 0.566743 )
+set Quat with floats: ( 0.752273 -0.476301 -0.747342 0.077386 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+construct Point3 with Vector3: ( -0.287826 0.942655 -0.634432 )
+set Point3 with float: ( -0.305600 -0.305600 -0.305600 )
+set Point3 with float: ( -0.229194 -0.229194 -0.229194 )
+aos type 0: ( 9.000000 10.000000 11.000000 )
+aos type 1: ( 6.000000 7.000000 8.000000 )
+aos type 2: ( 3.000000 4.000000 5.000000 )
+aos type 3: ( 0.000000 1.000000 2.000000 )
+select 0: ( -0.229194 -0.229194 -0.229194 )
+select 1: ( 0.780605 0.789803 0.974262 )
+select 2: ( -0.229194 -0.229194 -0.229194 )
+select 3: ( 0.780605 0.789803 0.974262 )
+load XYZ array: ( 0.376026 -0.935045 -0.189804 )
+xyzx: ( -0.376026 0.935045 0.189804 -0.007649 )
+yzxy: ( 0.440298 -0.994796 0.271484 0.259070 )
+zxyz: ( -0.902149 0.836501 -0.229093 -0.586393 )
+xyzx: ( -0.376026 0.935045 0.189804 -0.007649 )
+yzxy: ( 0.440298 -0.994796 0.271484 0.259070 )
+zxyz: ( -0.902149 0.836501 -0.229093 -0.586393 )
+storeXYZ:-1.0 -2.0 -3.0 0.4
+assign to Point3 from Point3: ( 0.780605 0.789803 0.974262 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+Point3::set( 0, float ): ( -0.260009 0.000000 0.000000 )
+Point3::operator [](0): ( 0.017680 0.000000 0.000000 )
+Point3::setX(): ( 0.912886 0.000000 0.000000 )
+Point3::set( 1, float ): ( 0.912886 -0.402380 0.000000 )
+Point3::operator [](1): ( 0.912886 1.043836 0.000000 )
+Point3::setY(): ( 0.912886 0.321576 0.000000 )
+Point3::set( 2, float ): ( 0.912886 0.321576 0.724026 )
+Point3::operator [](2): ( 0.912886 0.321576 -0.074816 )
+Point3::setZ(): ( 0.912886 0.321576 0.137730 )
+Point3::get( 0 ): 0.912886
+Point3::operator []( 0 ): 0.912886
+Point3::getX(): 0.912886
+Point3::get( 1 ): 0.321576
+Point3::operator []( 1 ): 0.321576
+Point3::getY(): 0.321576
+Point3::get( 2 ): 0.137730
+Point3::operator []( 2 ): 0.137730
+Point3::getZ(): 0.137730
+Point3 - Point3: ( 0.132280 -0.468228 -0.836532 )
+Point3 + Vector3: ( 0.772448 0.892445 -0.627235 )
+Point3 - Vector3: ( 1.053323 -0.249293 0.902695 )
+mulPerElem( Point3, Point3 ): ( 0.712603 0.253982 0.134185 )
+divPerElem( Point3, Point3 ): ( 1.169459 0.407159 0.141368 )
+Point3 recip: ( 1.095427 3.109689 7.260596 )
+Point3 sqrt: ( 0.955451 0.567076 0.371120 )
+Point3 rsqrt: ( 1.046627 1.763431 2.694549 )
+Point3 abs: ( 0.912886 0.321576 0.137730 )
+Point3 copySign: ( 0.912886 0.321576 0.137730 )
+Point3 maximum Point3: ( 0.912886 0.789803 0.974262 )
+Point3 minimum Point3: ( 0.780605 0.321576 0.137730 )
+Point3 maximum of elements: 0.912886
+Point3 minimum of elements: 0.137730
+Point3 sum of elements: 1.372191
+Point projection: -0.049984
+Point distSqrFromOrigin: 0.955741
+Point distFromOrigin: 0.977620
+Point distSqr: 0.936521
+Point dist: 0.967740
+Point3 lerp: ( -0.148533 -0.240318 0.263234 )
+set Vector3 with floats: ( -0.950919 -0.196258 -0.599808 )
+set Vector3 with floats: ( -0.794413 -0.927636 -0.187478 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Vector4 with floats: ( -0.113038 0.366120 -0.483786 0.622670 )
+set Vector4 with floats: ( -0.801806 0.295383 0.022005 -0.000678 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Vector4 elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Point3 with floats: ( -0.689334 0.266538 -0.785099 )
+set Point3 with floats: ( -0.572665 0.772760 0.971896 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Point3 elements to zero: ( 0.000000 0.000000 0.000000 )
+set Quat with floats: ( -0.961363 0.723449 -0.758442 -0.733158 )
+set Quat with floats: ( 0.139667 -0.738147 0.727224 0.089921 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+set Quat with Vector3, float: ( -0.950919 -0.196258 -0.599808 -0.113118 )
+construct Quat with Vector4: ( -0.113038 0.366120 -0.483786 0.622670 )
+set Quat with float: ( -0.364554 -0.364554 -0.364554 -0.364554 )
+set Quat with float: ( -0.137645 -0.137645 -0.137645 -0.137645 )
+aos type 0: ( 12.000000 13.000000 14.000000 15.000000 )
+aos type 1: ( 8.000000 9.000000 10.000000 11.000000 )
+aos type 2: ( 4.000000 5.000000 6.000000 7.000000 )
+aos type 3: ( 0.000000 1.000000 2.000000 3.000000 )
+select 0: ( -0.137645 -0.137645 -0.137645 -0.137645 )
+select 1: ( 0.139667 -0.738147 0.727224 0.089921 )
+select 2: ( -0.137645 -0.137645 -0.137645 -0.137645 )
+select 3: ( 0.139667 -0.738147 0.727224 0.089921 )
+assign to Quat from Quat: ( 0.139667 -0.738147 0.727224 0.089921 )
+set Quat xyz: ( -0.950919 -0.196258 -0.599808 0.089921 )
+get Quat xyz: ( -0.950919 -0.196258 -0.599808 )
+set Quat elements to zero: ( 0.000000 0.000000 0.000000 0.000000 )
+Quat::set( 0, float ): ( 0.933847 0.000000 0.000000 0.000000 )
+Quat::operator [](0): ( 2.026584 0.000000 0.000000 0.000000 )
+Quat::setX(): ( -0.069558 0.000000 0.000000 0.000000 )
+Quat::set( 1, float ): ( -0.069558 -0.216369 0.000000 0.000000 )
+Quat::operator [](1): ( -0.069558 -0.152646 0.000000 0.000000 )
+Quat::setY(): ( -0.069558 -0.022250 0.000000 0.000000 )
+Quat::set( 2, float ): ( -0.069558 -0.022250 0.035410 0.000000 )
+Quat::operator [](2): ( -0.069558 -0.022250 1.428823 0.000000 )
+Quat::setZ(): ( -0.069558 -0.022250 0.804061 0.000000 )
+Quat::set( 3, float ): ( -0.069558 -0.022250 0.804061 0.161693 )
+Quat::operator [](3): ( -0.069558 -0.022250 0.804061 -0.332169 )
+Quat::setW(): ( -0.069558 -0.022250 0.804061 -0.586017 )
+Quat::get( 0 ): -0.069558
+Quat::operator []( 0 ): -0.069558
+Quat::getX(): -0.069558
+Quat::get( 1 ): -0.022250
+Quat::operator []( 1 ): -0.022250
+Quat::getY(): -0.022250
+Quat::get( 2 ): 0.804061
+Quat::operator []( 2 ): 0.804061
+Quat::getZ(): 0.804061
+Quat::get( 3 ): -0.586017
+Quat::operator []( 3 ): -0.586017
+Quat::getW(): -0.586017
+Quat + Quat: ( 0.070108 -0.760397 1.531285 -0.496095 )
+Quat - Quat: ( -0.209225 0.715896 0.076837 -0.675938 )
+Quat * Quat: ( 0.489232 0.593451 -0.299411 -0.644137 )
+Quat * float: ( -0.031036 -0.009928 0.358759 -0.261471 )
+Quat / float: ( -0.103711 -0.033175 1.198842 -0.873742 )
+float * Quat: ( 0.021745 0.006956 -0.251360 0.183196 )
+Quat negate: ( 0.069558 0.022250 -0.804061 0.586017 )
+Quat dot Quat: 0.538746
+Quat lengthSqr: 0.995263
+Quat length: 0.997629
+Quat normalized: ( -0.069724 -0.022303 0.805972 -0.587410 )
+set to identity: ( 0.000000 0.000000 0.000000 1.000000 )
+Quat rotation between vectors: ( -0.256621 0.147283 0.358649 1.012405 )
+Quat rotation axis angle: ( 0.163854 0.033817 0.103354 0.985043 )
+Quat rotationX: ( -0.160489 0.000000 0.000000 0.987038 )
+Quat rotationY: ( 0.000000 0.030795 0.000000 0.999526 )
+Quat rotationZ: ( 0.000000 0.000000 0.054406 0.998519 )
+Quat rotate Vector3: ( -0.923966 -0.298415 -0.599808 )
+Quat conj: ( -0.000000 -0.000000 -0.054406 0.998519 )
+Quat lerp: ( -0.860671 -0.101017 0.008389 -0.459099 )
+Quat slerp: ( -0.935192 0.097684 -0.079386 -0.331015 )
+Quat squad: ( -0.208138 -0.281808 0.458751 -0.816585 )
+
+ __end__ 
diff --git a/Extras/vectormathlibrary/tests/test1_soa_c.c b/Extras/vectormathlibrary/tests/test1_soa_c.c
new file mode 100644
index 000000000..985488f1c
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test1_soa_c.c
@@ -0,0 +1,1006 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_SOA_TEST
+
+#include "vectormath_soa.h"
+#include "test.h"
+
+int iteration = 0;
+
+void
+Vector3_methods_test()
+{
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    VmathVector3 aos_Vector3_0, aos_Vector3_1, aos_Vector3_2, aos_Vector3_3;
+    VmathSoaVector3 soa_Vector3;
+    VmathVector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3;
+    VmathSoaPoint3 tmpP3_0;
+    VmathSoaVector3 tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7, tmpV3_8, tmpV3_9, tmpV3_10, tmpV3_11, tmpV3_12, tmpV3_13, tmpV3_14, tmpV3_15, tmpV3_16, tmpV3_17, tmpV3_18, tmpV3_19;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4;
+    float xyz4[12] __attribute__ ((aligned(16)));
+    xyz4[0] = getfloat(randfloat());
+    xyz4[1] = getfloat(randfloat());
+    xyz4[2] = getfloat(randfloat());
+    xyz4[3] = getfloat(randfloat());
+    xyz4[4] = getfloat(randfloat());
+    xyz4[5] = getfloat(randfloat());
+    xyz4[6] = getfloat(randfloat());
+    xyz4[7] = getfloat(randfloat());
+    xyz4[8] = getfloat(randfloat());
+    xyz4[9] = getfloat(randfloat());
+    xyz4[10] = getfloat(randfloat());
+    xyz4[11] = getfloat(randfloat());
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3MakeFromScalar( &c_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3MakeFromScalar( &d_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3MakeFromScalar( &e_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 elements to zero" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 elements to zero" );
+    vmathSoaV3Prints( &e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4MakeFromScalar( &c_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4MakeFromScalar( &d_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4MakeFromScalar( &e_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 elements to zero" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 elements to zero" );
+    vmathSoaV4Prints( &e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3MakeFromScalar( &c_Point3, (vec_float4){0.0f} );
+    vmathSoaP3MakeFromScalar( &d_Point3, (vec_float4){0.0f} );
+    vmathSoaP3MakeFromScalar( &e_Point3, (vec_float4){0.0f} );
+    vmathSoaP3Prints( &c_Point3, "set Point3 elements to zero" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 elements to zero" );
+    vmathSoaP3Prints( &e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQMakeFromScalar( &c_Quat, (vec_float4){0.0f} );
+    vmathSoaQMakeFromScalar( &d_Quat, (vec_float4){0.0f} );
+    vmathSoaQMakeFromScalar( &e_Quat, (vec_float4){0.0f} );
+    vmathSoaQPrints( &c_Quat, "set Quat elements to zero" );
+    vmathSoaQPrints( &d_Quat, "set Quat elements to zero" );
+    vmathSoaQPrints( &e_Quat, "set Quat elements to zero" );
+    vmathSoaV3MakeFromP3( &a_Vector3, &a_Point3 );
+    vmathSoaV3Prints( &a_Vector3, "construct Vector3 with Point3" );
+    vmathSoaV3MakeFromScalar( &a_Vector3, randfloat() );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with float" );
+    vmathSoaV3MakeFromScalar( &a_Vector3, randfloat() );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with float" );
+    vmathV3MakeFromElems( &aos_Vector3_0, 0.0f, 1.0f, 2.0f );
+    vmathV3MakeFromElems( &aos_Vector3_1, 3.0f, 4.0f, 5.0f );
+    vmathV3MakeFromElems( &aos_Vector3_2, 6.0f, 7.0f, 8.0f );
+    vmathV3MakeFromElems( &aos_Vector3_3, 9.0f, 10.0f, 11.0f );
+    vmathSoaV3MakeFrom4Aos( &soa_Vector3, &aos_Vector3_0, &aos_Vector3_1, &aos_Vector3_2, &aos_Vector3_3 );
+    vmathSoaV3Get4Aos( &soa_Vector3, &aos_Vector3_3, &aos_Vector3_2, &aos_Vector3_1, &aos_Vector3_0 );
+    vmathV3Prints( &aos_Vector3_0, "aos type 0" );
+    vmathV3Prints( &aos_Vector3_1, "aos type 1" );
+    vmathV3Prints( &aos_Vector3_2, "aos type 2" );
+    vmathV3Prints( &aos_Vector3_3, "aos type 3" );
+    vmathSoaV3Select( &a_Vector3, &a_Vector3, &b_Vector3, ((vec_uint4){0,0xffffffff,0,0xffffffff}) );
+    vmathSoaV3Get4Aos( &a_Vector3, &aos_Vector3_0, &aos_Vector3_1, &aos_Vector3_2, &aos_Vector3_3 );
+    vmathV3Prints( &aos_Vector3_0, "select 0" );
+    vmathV3Prints( &aos_Vector3_1, "select 1" );
+    vmathV3Prints( &aos_Vector3_2, "select 2" );
+    vmathV3Prints( &aos_Vector3_3, "select 3" );
+    vmathSoaV3LoadXYZArray( &a_Vector3, (const vec_float4 *)xyz4 );
+    vmathSoaV3Prints( &a_Vector3, "load XYZ array" );
+    vmathSoaV3Copy( &tmpV3_0, &a_Vector3 );
+    vmathSoaV3Neg( &tmpV3_1, &tmpV3_0 );
+    vmathSoaV3Copy( &a_Vector3, &tmpV3_1 );
+    vmathSoaV3StoreXYZArray( &a_Vector3, (vec_float4 *)xyz4 );
+    vmathV4MakeFromElems( &aos_Vector4_0, xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    vmathV4MakeFromElems( &aos_Vector4_1, xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    vmathV4MakeFromElems( &aos_Vector4_2, xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    vmathV4Prints( &aos_Vector4_0, "xyzx" );
+    vmathV4Prints( &aos_Vector4_1, "yzxy" );
+    vmathV4Prints( &aos_Vector4_2, "zxyz" );
+    vmathV4Prints( &aos_Vector4_0, "xyzx" );
+    vmathV4Prints( &aos_Vector4_1, "yzxy" );
+    vmathV4Prints( &aos_Vector4_2, "zxyz" );
+    printf("storeXYZ:-1.0 -2.0 -3.0 0.4\n");
+    vmathSoaV3Copy( &a_Vector3, &b_Vector3 );
+    vmathSoaV3Prints( &a_Vector3, "assign to Vector3 from Vector3" );
+    vmathSoaV3MakeFromScalar( &a_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 elements to zero" );
+    vmathSoaV3MakeXAxis( &a_Vector3 );
+    vmathSoaV3Prints( &a_Vector3, "set to x axis" );
+    vmathSoaV3MakeYAxis( &a_Vector3 );
+    vmathSoaV3Prints( &a_Vector3, "set to y axis" );
+    vmathSoaV3MakeZAxis( &a_Vector3 );
+    vmathSoaV3Prints( &a_Vector3, "set to z axis" );
+    vmathSoaV3SetElem( &a_Vector3, 0, randfloat() );
+    vmathSoaV3Prints( &a_Vector3, "Vector3::set( 0, float )" );
+    vmathSoaV3SetElem( &a_Vector3, 0, randfloat() );
+    vmathSoaV3SetElem( &a_Vector3, 0, vec_mul_float( vmathSoaV3GetElem( &a_Vector3, 0 ), randfloat() ) );
+    vmathSoaV3SetElem( &a_Vector3, 0, divf4( vmathSoaV3GetElem( &a_Vector3, 0 ), randfloat() ) );
+    vmathSoaV3SetElem( &a_Vector3, 0, vec_add_float( vmathSoaV3GetElem( &a_Vector3, 0 ), randfloat() ) );
+    vmathSoaV3SetElem( &a_Vector3, 0, vec_sub_float( vmathSoaV3GetElem( &a_Vector3, 0 ), randfloat() ) );
+    vmathSoaV3Prints( &a_Vector3, "Vector3::operator [](0)" );
+    vmathSoaV3SetX( &a_Vector3, randfloat() );
+    vmathSoaV3Prints( &a_Vector3, "Vector3::setX()" );
+    vmathSoaV3SetElem( &a_Vector3, 1, randfloat() );
+    vmathSoaV3Prints( &a_Vector3, "Vector3::set( 1, float )" );
+    vmathSoaV3SetElem( &a_Vector3, 1, randfloat() );
+    vmathSoaV3SetElem( &a_Vector3, 1, vec_mul_float( vmathSoaV3GetElem( &a_Vector3, 1 ), randfloat() ) );
+    vmathSoaV3SetElem( &a_Vector3, 1, divf4( vmathSoaV3GetElem( &a_Vector3, 1 ), randfloat() ) );
+    vmathSoaV3SetElem( &a_Vector3, 1, vec_add_float( vmathSoaV3GetElem( &a_Vector3, 1 ), randfloat() ) );
+    vmathSoaV3SetElem( &a_Vector3, 1, vec_sub_float( vmathSoaV3GetElem( &a_Vector3, 1 ), randfloat() ) );
+    vmathSoaV3Prints( &a_Vector3, "Vector3::operator [](1)" );
+    vmathSoaV3SetY( &a_Vector3, randfloat() );
+    vmathSoaV3Prints( &a_Vector3, "Vector3::setY()" );
+    vmathSoaV3SetElem( &a_Vector3, 2, randfloat() );
+    vmathSoaV3Prints( &a_Vector3, "Vector3::set( 2, float )" );
+    vmathSoaV3SetElem( &a_Vector3, 2, randfloat() );
+    vmathSoaV3SetElem( &a_Vector3, 2, vec_mul_float( vmathSoaV3GetElem( &a_Vector3, 2 ), randfloat() ) );
+    vmathSoaV3SetElem( &a_Vector3, 2, divf4( vmathSoaV3GetElem( &a_Vector3, 2 ), randfloat() ) );
+    vmathSoaV3SetElem( &a_Vector3, 2, vec_add_float( vmathSoaV3GetElem( &a_Vector3, 2 ), randfloat() ) );
+    vmathSoaV3SetElem( &a_Vector3, 2, vec_sub_float( vmathSoaV3GetElem( &a_Vector3, 2 ), randfloat() ) );
+    vmathSoaV3Prints( &a_Vector3, "Vector3::operator [](2)" );
+    vmathSoaV3SetZ( &a_Vector3, randfloat() );
+    vmathSoaV3Prints( &a_Vector3, "Vector3::setZ()" );
+    printf("Vector3::get( 0 ): %f\n", getfloat(vmathSoaV3GetElem( &a_Vector3, 0 )) );
+    printf("Vector3::operator []( 0 ): %f\n", getfloat(vmathSoaV3GetElem( &a_Vector3, 0 )) );
+    printf("Vector3::getX(): %f\n", getfloat(vmathSoaV3GetX( &a_Vector3 )) );
+    printf("Vector3::get( 1 ): %f\n", getfloat(vmathSoaV3GetElem( &a_Vector3, 1 )) );
+    printf("Vector3::operator []( 1 ): %f\n", getfloat(vmathSoaV3GetElem( &a_Vector3, 1 )) );
+    printf("Vector3::getY(): %f\n", getfloat(vmathSoaV3GetY( &a_Vector3 )) );
+    printf("Vector3::get( 2 ): %f\n", getfloat(vmathSoaV3GetElem( &a_Vector3, 2 )) );
+    printf("Vector3::operator []( 2 ): %f\n", getfloat(vmathSoaV3GetElem( &a_Vector3, 2 )) );
+    printf("Vector3::getZ(): %f\n", getfloat(vmathSoaV3GetZ( &a_Vector3 )) );
+    vmathSoaV3Add( &tmpV3_2, &a_Vector3, &b_Vector3 );
+    vmathSoaV3Prints( &tmpV3_2, "Vector3 + Vector3" );
+    vmathSoaV3Sub( &tmpV3_3, &a_Vector3, &b_Vector3 );
+    vmathSoaV3Prints( &tmpV3_3, "Vector3 - Vector3" );
+    vmathSoaV3AddP3( &tmpP3_0, &a_Vector3, &b_Point3 );
+    vmathSoaP3Prints( &tmpP3_0, "Vector3 + Point3" );
+    vmathSoaV3ScalarMul( &tmpV3_4, &a_Vector3, randfloat() );
+    vmathSoaV3Prints( &tmpV3_4, "Vector3 * float" );
+    vmathSoaV3ScalarDiv( &tmpV3_5, &a_Vector3, randfloat() );
+    vmathSoaV3Prints( &tmpV3_5, "Vector3 / float" );
+    vmathSoaV3ScalarMul( &tmpV3_6, &a_Vector3, randfloat() );
+    vmathSoaV3Prints( &tmpV3_6, "float * Vector3" );
+    vmathSoaV3Neg( &tmpV3_7, &a_Vector3 );
+    vmathSoaV3Prints( &tmpV3_7, "Vector3 negate" );
+    vmathSoaV3MulPerElem( &tmpV3_8, &a_Vector3, &b_Vector3 );
+    vmathSoaV3Prints( &tmpV3_8, "mulPerElem( Vector3, Vector3 )" );
+    vmathSoaV3DivPerElem( &tmpV3_9, &a_Vector3, &b_Vector3 );
+    vmathSoaV3Prints( &tmpV3_9, "divPerElem( Vector3, Vector3 )" );
+    vmathSoaV3RecipPerElem( &tmpV3_10, &a_Vector3 );
+    vmathSoaV3Prints( &tmpV3_10, "Vector3 recip" );
+    vmathSoaV3AbsPerElem( &tmpV3_11, &a_Vector3 );
+    vmathSoaV3SqrtPerElem( &tmpV3_12, &tmpV3_11 );
+    vmathSoaV3Prints( &tmpV3_12, "Vector3 sqrt" );
+    vmathSoaV3AbsPerElem( &tmpV3_13, &a_Vector3 );
+    vmathSoaV3RsqrtPerElem( &tmpV3_14, &tmpV3_13 );
+    vmathSoaV3Prints( &tmpV3_14, "Vector3 rsqrt" );
+    vmathSoaV3AbsPerElem( &tmpV3_15, &a_Vector3 );
+    vmathSoaV3Prints( &tmpV3_15, "Vector3 abs" );
+    vmathSoaV3CopySignPerElem( &tmpV3_16, &a_Vector3, &b_Vector3 );
+    vmathSoaV3Prints( &tmpV3_16, "Vector3 copySign" );
+    vmathSoaV3MaxPerElem( &tmpV3_17, &a_Vector3, &b_Vector3 );
+    vmathSoaV3Prints( &tmpV3_17, "Vector3 maximum Vector3" );
+    vmathSoaV3MinPerElem( &tmpV3_18, &a_Vector3, &b_Vector3 );
+    vmathSoaV3Prints( &tmpV3_18, "Vector3 minimum Vector3" );
+    printf("Vector3 maximum of elements: %f\n", getfloat(vmathSoaV3MaxElem( &a_Vector3 )));
+    printf("Vector3 minimum of elements: %f\n", getfloat(vmathSoaV3MinElem( &a_Vector3 )));
+    printf("Vector3 sum of elements: %f\n", getfloat(vmathSoaV3Sum( &a_Vector3 )));
+    printf("Vector3 dot Vector3: %f\n", getfloat(vmathSoaV3Dot( &a_Vector3, &b_Vector3 )));
+    printf("Vector3 lengthSqr: %f\n", getfloat(vmathSoaV3LengthSqr( &a_Vector3 )));
+    printf("Vector3 length: %f\n", getfloat(vmathSoaV3Length( &a_Vector3 )));
+    vmathSoaV3Normalize( &tmpV3_19, &a_Vector3 );
+    vmathSoaV3Prints( &tmpV3_19, "Vector3 normalized" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &e_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Normalize( &b_Vector3, &b_Vector3 );
+    vmathSoaV3Normalize( &c_Vector3, &c_Vector3 );
+    vmathSoaV3Normalize( &d_Vector3, &d_Vector3 );
+    vmathSoaV3Normalize( &e_Vector3, &e_Vector3 );
+    vmathSoaV3Lerp( &a_Vector3, randfloat(), &b_Vector3, &c_Vector3 );
+    vmathSoaV3Prints( &a_Vector3, "Vector3 lerp" );
+    vmathSoaV3Slerp( &a_Vector3, randfloat(), &b_Vector3, &c_Vector3 );
+    vmathSoaV3Prints( &a_Vector3, "Vector3 slerp" );
+}
+
+void
+Vector4_methods_test()
+{
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    VmathVector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2, aos_Vector4_3;
+    VmathSoaVector4 soa_Vector4;
+    VmathSoaVector3 tmpV3_0;
+    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3, tmpV4_4, tmpV4_5, tmpV4_6, tmpV4_7, tmpV4_8, tmpV4_9, tmpV4_10, tmpV4_11, tmpV4_12, tmpV4_13, tmpV4_14, tmpV4_15, tmpV4_16, tmpV4_17;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3MakeFromScalar( &c_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3MakeFromScalar( &d_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3MakeFromScalar( &e_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 elements to zero" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 elements to zero" );
+    vmathSoaV3Prints( &e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4MakeFromScalar( &c_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4MakeFromScalar( &d_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4MakeFromScalar( &e_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 elements to zero" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 elements to zero" );
+    vmathSoaV4Prints( &e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3MakeFromScalar( &c_Point3, (vec_float4){0.0f} );
+    vmathSoaP3MakeFromScalar( &d_Point3, (vec_float4){0.0f} );
+    vmathSoaP3MakeFromScalar( &e_Point3, (vec_float4){0.0f} );
+    vmathSoaP3Prints( &c_Point3, "set Point3 elements to zero" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 elements to zero" );
+    vmathSoaP3Prints( &e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQMakeFromScalar( &c_Quat, (vec_float4){0.0f} );
+    vmathSoaQMakeFromScalar( &d_Quat, (vec_float4){0.0f} );
+    vmathSoaQMakeFromScalar( &e_Quat, (vec_float4){0.0f} );
+    vmathSoaQPrints( &c_Quat, "set Quat elements to zero" );
+    vmathSoaQPrints( &d_Quat, "set Quat elements to zero" );
+    vmathSoaQPrints( &e_Quat, "set Quat elements to zero" );
+    vmathSoaV4MakeFromV3Scalar( &a_Vector4, &a_Vector3, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with Vector3, float" );
+    vmathSoaV4MakeFromV3( &a_Vector4, &a_Vector3 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with Vector3" );
+    vmathSoaV4MakeFromP3( &a_Vector4, &a_Point3 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with Point3" );
+    vmathSoaV4MakeFromQ( &a_Vector4, &a_Quat );
+    vmathSoaV4Prints( &a_Vector4, "construct Vector4 with Quat" );
+    vmathSoaV4MakeFromScalar( &a_Vector4, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with float" );
+    vmathSoaV4MakeFromScalar( &a_Vector4, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with float" );
+    vmathV4MakeFromElems( &aos_Vector4_0, 0.0f, 1.0f, 2.0f, 3.0f );
+    vmathV4MakeFromElems( &aos_Vector4_1, 4.0f, 5.0f, 6.0f, 7.0f );
+    vmathV4MakeFromElems( &aos_Vector4_2, 8.0f, 9.0f, 10.0f, 11.0f );
+    vmathV4MakeFromElems( &aos_Vector4_3, 12.0f, 13.0f, 14.0f, 15.0f );
+    vmathSoaV4MakeFrom4Aos( &soa_Vector4, &aos_Vector4_0, &aos_Vector4_1, &aos_Vector4_2, &aos_Vector4_3 );
+    vmathSoaV4Get4Aos( &soa_Vector4, &aos_Vector4_3, &aos_Vector4_2, &aos_Vector4_1, &aos_Vector4_0 );
+    vmathV4Prints( &aos_Vector4_0, "aos type 0" );
+    vmathV4Prints( &aos_Vector4_1, "aos type 1" );
+    vmathV4Prints( &aos_Vector4_2, "aos type 2" );
+    vmathV4Prints( &aos_Vector4_3, "aos type 3" );
+    vmathSoaV4Select( &a_Vector4, &a_Vector4, &b_Vector4, ((vec_uint4){0,0xffffffff,0,0xffffffff}) );
+    vmathSoaV4Get4Aos( &a_Vector4, &aos_Vector4_0, &aos_Vector4_1, &aos_Vector4_2, &aos_Vector4_3 );
+    vmathV4Prints( &aos_Vector4_0, "select 0" );
+    vmathV4Prints( &aos_Vector4_1, "select 1" );
+    vmathV4Prints( &aos_Vector4_2, "select 2" );
+    vmathV4Prints( &aos_Vector4_3, "select 3" );
+    vmathSoaV4Copy( &a_Vector4, &b_Vector4 );
+    vmathSoaV4Prints( &a_Vector4, "assign to Vector4 from Vector4" );
+    vmathSoaV4SetXYZ( &a_Vector4, &a_Vector3 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 xyz" );
+    vmathSoaV4GetXYZ( &tmpV3_0, &a_Vector4 );
+    vmathSoaV3Prints( &tmpV3_0, "get Vector4 xyz" );
+    vmathSoaV4MakeFromScalar( &a_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 elements to zero" );
+    vmathSoaV4MakeXAxis( &a_Vector4 );
+    vmathSoaV4Prints( &a_Vector4, "set to x axis" );
+    vmathSoaV4MakeYAxis( &a_Vector4 );
+    vmathSoaV4Prints( &a_Vector4, "set to y axis" );
+    vmathSoaV4MakeZAxis( &a_Vector4 );
+    vmathSoaV4Prints( &a_Vector4, "set to z axis" );
+    vmathSoaV4MakeWAxis( &a_Vector4 );
+    vmathSoaV4Prints( &a_Vector4, "set to w axis" );
+    vmathSoaV4SetElem( &a_Vector4, 0, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::set( 0, float )" );
+    vmathSoaV4SetElem( &a_Vector4, 0, randfloat() );
+    vmathSoaV4SetElem( &a_Vector4, 0, vec_mul_float( vmathSoaV4GetElem( &a_Vector4, 0 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 0, divf4( vmathSoaV4GetElem( &a_Vector4, 0 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 0, vec_add_float( vmathSoaV4GetElem( &a_Vector4, 0 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 0, vec_sub_float( vmathSoaV4GetElem( &a_Vector4, 0 ), randfloat() ) );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::operator [](0)" );
+    vmathSoaV4SetX( &a_Vector4, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::setX()" );
+    vmathSoaV4SetElem( &a_Vector4, 1, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::set( 1, float )" );
+    vmathSoaV4SetElem( &a_Vector4, 1, randfloat() );
+    vmathSoaV4SetElem( &a_Vector4, 1, vec_mul_float( vmathSoaV4GetElem( &a_Vector4, 1 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 1, divf4( vmathSoaV4GetElem( &a_Vector4, 1 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 1, vec_add_float( vmathSoaV4GetElem( &a_Vector4, 1 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 1, vec_sub_float( vmathSoaV4GetElem( &a_Vector4, 1 ), randfloat() ) );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::operator [](1)" );
+    vmathSoaV4SetY( &a_Vector4, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::setY()" );
+    vmathSoaV4SetElem( &a_Vector4, 2, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::set( 2, float )" );
+    vmathSoaV4SetElem( &a_Vector4, 2, randfloat() );
+    vmathSoaV4SetElem( &a_Vector4, 2, vec_mul_float( vmathSoaV4GetElem( &a_Vector4, 2 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 2, divf4( vmathSoaV4GetElem( &a_Vector4, 2 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 2, vec_add_float( vmathSoaV4GetElem( &a_Vector4, 2 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 2, vec_sub_float( vmathSoaV4GetElem( &a_Vector4, 2 ), randfloat() ) );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::operator [](2)" );
+    vmathSoaV4SetZ( &a_Vector4, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::setZ()" );
+    vmathSoaV4SetElem( &a_Vector4, 3, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::set( 3, float )" );
+    vmathSoaV4SetElem( &a_Vector4, 3, randfloat() );
+    vmathSoaV4SetElem( &a_Vector4, 3, vec_mul_float( vmathSoaV4GetElem( &a_Vector4, 3 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 3, divf4( vmathSoaV4GetElem( &a_Vector4, 3 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 3, vec_add_float( vmathSoaV4GetElem( &a_Vector4, 3 ), randfloat() ) );
+    vmathSoaV4SetElem( &a_Vector4, 3, vec_sub_float( vmathSoaV4GetElem( &a_Vector4, 3 ), randfloat() ) );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::operator [](3)" );
+    vmathSoaV4SetW( &a_Vector4, randfloat() );
+    vmathSoaV4Prints( &a_Vector4, "Vector4::setW()" );
+    printf("Vector4::get( 0 ): %f\n", getfloat(vmathSoaV4GetElem( &a_Vector4, 0 )) );
+    printf("Vector4::operator []( 0 ): %f\n", getfloat(vmathSoaV4GetElem( &a_Vector4, 0 )) );
+    printf("Vector4::getX(): %f\n", getfloat(vmathSoaV4GetX( &a_Vector4 )) );
+    printf("Vector4::get( 1 ): %f\n", getfloat(vmathSoaV4GetElem( &a_Vector4, 1 )) );
+    printf("Vector4::operator []( 1 ): %f\n", getfloat(vmathSoaV4GetElem( &a_Vector4, 1 )) );
+    printf("Vector4::getY(): %f\n", getfloat(vmathSoaV4GetY( &a_Vector4 )) );
+    printf("Vector4::get( 2 ): %f\n", getfloat(vmathSoaV4GetElem( &a_Vector4, 2 )) );
+    printf("Vector4::operator []( 2 ): %f\n", getfloat(vmathSoaV4GetElem( &a_Vector4, 2 )) );
+    printf("Vector4::getZ(): %f\n", getfloat(vmathSoaV4GetZ( &a_Vector4 )) );
+    printf("Vector4::get( 3 ): %f\n", getfloat(vmathSoaV4GetElem( &a_Vector4, 3 )) );
+    printf("Vector4::operator []( 3 ): %f\n", getfloat(vmathSoaV4GetElem( &a_Vector4, 3 )) );
+    printf("Vector4::getW(): %f\n", getfloat(vmathSoaV4GetW( &a_Vector4 )) );
+    vmathSoaV4Add( &tmpV4_0, &a_Vector4, &b_Vector4 );
+    vmathSoaV4Prints( &tmpV4_0, "Vector4 + Vector4" );
+    vmathSoaV4Sub( &tmpV4_1, &a_Vector4, &b_Vector4 );
+    vmathSoaV4Prints( &tmpV4_1, "Vector4 - Vector4" );
+    vmathSoaV4ScalarMul( &tmpV4_2, &a_Vector4, randfloat() );
+    vmathSoaV4Prints( &tmpV4_2, "Vector4 * float" );
+    vmathSoaV4ScalarDiv( &tmpV4_3, &a_Vector4, randfloat() );
+    vmathSoaV4Prints( &tmpV4_3, "Vector4 / float" );
+    vmathSoaV4ScalarMul( &tmpV4_4, &a_Vector4, randfloat() );
+    vmathSoaV4Prints( &tmpV4_4, "float * Vector4" );
+    vmathSoaV4Neg( &tmpV4_5, &a_Vector4 );
+    vmathSoaV4Prints( &tmpV4_5, "Vector4 negate" );
+    vmathSoaV4MulPerElem( &tmpV4_6, &a_Vector4, &b_Vector4 );
+    vmathSoaV4Prints( &tmpV4_6, "mulPerElem( Vector4, Vector4 )" );
+    vmathSoaV4DivPerElem( &tmpV4_7, &a_Vector4, &b_Vector4 );
+    vmathSoaV4Prints( &tmpV4_7, "divPerElem( Vector4, Vector4 )" );
+    vmathSoaV4RecipPerElem( &tmpV4_8, &a_Vector4 );
+    vmathSoaV4Prints( &tmpV4_8, "Vector4 recip" );
+    vmathSoaV4AbsPerElem( &tmpV4_9, &a_Vector4 );
+    vmathSoaV4SqrtPerElem( &tmpV4_10, &tmpV4_9 );
+    vmathSoaV4Prints( &tmpV4_10, "Vector4 sqrt" );
+    vmathSoaV4AbsPerElem( &tmpV4_11, &a_Vector4 );
+    vmathSoaV4RsqrtPerElem( &tmpV4_12, &tmpV4_11 );
+    vmathSoaV4Prints( &tmpV4_12, "Vector4 rsqrt" );
+    vmathSoaV4AbsPerElem( &tmpV4_13, &a_Vector4 );
+    vmathSoaV4Prints( &tmpV4_13, "Vector4 abs" );
+    vmathSoaV4CopySignPerElem( &tmpV4_14, &a_Vector4, &b_Vector4 );
+    vmathSoaV4Prints( &tmpV4_14, "Vector4 copySign" );
+    vmathSoaV4MaxPerElem( &tmpV4_15, &a_Vector4, &b_Vector4 );
+    vmathSoaV4Prints( &tmpV4_15, "Vector4 maximum Vector4" );
+    vmathSoaV4MinPerElem( &tmpV4_16, &a_Vector4, &b_Vector4 );
+    vmathSoaV4Prints( &tmpV4_16, "Vector4 minimum Vector4" );
+    printf("Vector4 maximum of elements: %f\n", getfloat(vmathSoaV4MaxElem( &a_Vector4 )));
+    printf("Vector4 minimum of elements: %f\n", getfloat(vmathSoaV4MinElem( &a_Vector4 )));
+    printf("Vector4 sum of elements: %f\n", getfloat(vmathSoaV4Sum( &a_Vector4 )));
+    printf("Vector4 dot Vector4: %f\n", getfloat(vmathSoaV4Dot( &a_Vector4, &b_Vector4 )));
+    printf("Vector4 lengthSqr: %f\n", getfloat(vmathSoaV4LengthSqr( &a_Vector4 )));
+    printf("Vector4 length: %f\n", getfloat(vmathSoaV4Length( &a_Vector4 )));
+    vmathSoaV4Normalize( &tmpV4_17, &a_Vector4 );
+    vmathSoaV4Prints( &tmpV4_17, "Vector4 normalized" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &e_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Normalize( &b_Vector4, &b_Vector4 );
+    vmathSoaV4Normalize( &c_Vector4, &c_Vector4 );
+    vmathSoaV4Normalize( &d_Vector4, &d_Vector4 );
+    vmathSoaV4Normalize( &e_Vector4, &e_Vector4 );
+    vmathSoaV4Lerp( &a_Vector4, randfloat(), &b_Vector4, &c_Vector4 );
+    vmathSoaV4Prints( &a_Vector4, "Vector4 lerp" );
+    vmathSoaV4Slerp( &a_Vector4, randfloat(), &b_Vector4, &c_Vector4 );
+    vmathSoaV4Prints( &a_Vector4, "Vector4 slerp" );
+}
+
+void
+Point3_methods_test()
+{
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    VmathPoint3 aos_Point3_0, aos_Point3_1, aos_Point3_2, aos_Point3_3;
+    VmathSoaPoint3 soa_Point3;
+    VmathVector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2;
+    VmathSoaPoint3 tmpP3_0, tmpP3_1, tmpP3_2, tmpP3_3, tmpP3_4, tmpP3_5, tmpP3_6, tmpP3_7, tmpP3_8, tmpP3_9, tmpP3_10, tmpP3_11, tmpP3_12;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4;
+    float xyz4[12] __attribute__ ((aligned(16)));
+    xyz4[0] = getfloat(randfloat());
+    xyz4[1] = getfloat(randfloat());
+    xyz4[2] = getfloat(randfloat());
+    xyz4[3] = getfloat(randfloat());
+    xyz4[4] = getfloat(randfloat());
+    xyz4[5] = getfloat(randfloat());
+    xyz4[6] = getfloat(randfloat());
+    xyz4[7] = getfloat(randfloat());
+    xyz4[8] = getfloat(randfloat());
+    xyz4[9] = getfloat(randfloat());
+    xyz4[10] = getfloat(randfloat());
+    xyz4[11] = getfloat(randfloat());
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3MakeFromScalar( &c_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3MakeFromScalar( &d_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3MakeFromScalar( &e_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 elements to zero" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 elements to zero" );
+    vmathSoaV3Prints( &e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4MakeFromScalar( &c_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4MakeFromScalar( &d_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4MakeFromScalar( &e_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 elements to zero" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 elements to zero" );
+    vmathSoaV4Prints( &e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3MakeFromScalar( &c_Point3, (vec_float4){0.0f} );
+    vmathSoaP3MakeFromScalar( &d_Point3, (vec_float4){0.0f} );
+    vmathSoaP3MakeFromScalar( &e_Point3, (vec_float4){0.0f} );
+    vmathSoaP3Prints( &c_Point3, "set Point3 elements to zero" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 elements to zero" );
+    vmathSoaP3Prints( &e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQMakeFromScalar( &c_Quat, (vec_float4){0.0f} );
+    vmathSoaQMakeFromScalar( &d_Quat, (vec_float4){0.0f} );
+    vmathSoaQMakeFromScalar( &e_Quat, (vec_float4){0.0f} );
+    vmathSoaQPrints( &c_Quat, "set Quat elements to zero" );
+    vmathSoaQPrints( &d_Quat, "set Quat elements to zero" );
+    vmathSoaQPrints( &e_Quat, "set Quat elements to zero" );
+    vmathSoaP3MakeFromV3( &a_Point3, &a_Vector3 );
+    vmathSoaP3Prints( &a_Point3, "construct Point3 with Vector3" );
+    vmathSoaP3MakeFromScalar( &a_Point3, randfloat() );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with float" );
+    vmathSoaP3MakeFromScalar( &a_Point3, randfloat() );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with float" );
+    vmathP3MakeFromElems( &aos_Point3_0, 0.0f, 1.0f, 2.0f );
+    vmathP3MakeFromElems( &aos_Point3_1, 3.0f, 4.0f, 5.0f );
+    vmathP3MakeFromElems( &aos_Point3_2, 6.0f, 7.0f, 8.0f );
+    vmathP3MakeFromElems( &aos_Point3_3, 9.0f, 10.0f, 11.0f );
+    vmathSoaP3MakeFrom4Aos( &soa_Point3, &aos_Point3_0, &aos_Point3_1, &aos_Point3_2, &aos_Point3_3 );
+    vmathSoaP3Get4Aos( &soa_Point3, &aos_Point3_3, &aos_Point3_2, &aos_Point3_1, &aos_Point3_0 );
+    vmathP3Prints( &aos_Point3_0, "aos type 0" );
+    vmathP3Prints( &aos_Point3_1, "aos type 1" );
+    vmathP3Prints( &aos_Point3_2, "aos type 2" );
+    vmathP3Prints( &aos_Point3_3, "aos type 3" );
+    vmathSoaP3Select( &a_Point3, &a_Point3, &b_Point3, ((vec_uint4){0,0xffffffff,0,0xffffffff}) );
+    vmathSoaP3Get4Aos( &a_Point3, &aos_Point3_0, &aos_Point3_1, &aos_Point3_2, &aos_Point3_3 );
+    vmathP3Prints( &aos_Point3_0, "select 0" );
+    vmathP3Prints( &aos_Point3_1, "select 1" );
+    vmathP3Prints( &aos_Point3_2, "select 2" );
+    vmathP3Prints( &aos_Point3_3, "select 3" );
+    vmathSoaP3LoadXYZArray( &a_Point3, (const vec_float4 *)xyz4 );
+    vmathSoaP3Prints( &a_Point3, "load XYZ array" );
+    vmathSoaV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathSoaV3Neg( &tmpV3_1, &tmpV3_0 );
+    vmathSoaP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathSoaP3StoreXYZArray( &a_Point3, (vec_float4 *)xyz4 );
+    vmathV4MakeFromElems( &aos_Vector4_0, xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    vmathV4MakeFromElems( &aos_Vector4_1, xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    vmathV4MakeFromElems( &aos_Vector4_2, xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    vmathV4Prints( &aos_Vector4_0, "xyzx" );
+    vmathV4Prints( &aos_Vector4_1, "yzxy" );
+    vmathV4Prints( &aos_Vector4_2, "zxyz" );
+    vmathV4Prints( &aos_Vector4_0, "xyzx" );
+    vmathV4Prints( &aos_Vector4_1, "yzxy" );
+    vmathV4Prints( &aos_Vector4_2, "zxyz" );
+    printf("storeXYZ:-1.0 -2.0 -3.0 0.4\n");
+    vmathSoaP3Copy( &a_Point3, &b_Point3 );
+    vmathSoaP3Prints( &a_Point3, "assign to Point3 from Point3" );
+    vmathSoaP3MakeFromScalar( &a_Point3, (vec_float4){0.0f} );
+    vmathSoaP3Prints( &a_Point3, "set Point3 elements to zero" );
+    vmathSoaP3SetElem( &a_Point3, 0, randfloat() );
+    vmathSoaP3Prints( &a_Point3, "Point3::set( 0, float )" );
+    vmathSoaP3SetElem( &a_Point3, 0, randfloat() );
+    vmathSoaP3SetElem( &a_Point3, 0, vec_mul_float( vmathSoaP3GetElem( &a_Point3, 0 ), randfloat() ) );
+    vmathSoaP3SetElem( &a_Point3, 0, divf4( vmathSoaP3GetElem( &a_Point3, 0 ), randfloat() ) );
+    vmathSoaP3SetElem( &a_Point3, 0, vec_add_float( vmathSoaP3GetElem( &a_Point3, 0 ), randfloat() ) );
+    vmathSoaP3SetElem( &a_Point3, 0, vec_sub_float( vmathSoaP3GetElem( &a_Point3, 0 ), randfloat() ) );
+    vmathSoaP3Prints( &a_Point3, "Point3::operator [](0)" );
+    vmathSoaP3SetX( &a_Point3, randfloat() );
+    vmathSoaP3Prints( &a_Point3, "Point3::setX()" );
+    vmathSoaP3SetElem( &a_Point3, 1, randfloat() );
+    vmathSoaP3Prints( &a_Point3, "Point3::set( 1, float )" );
+    vmathSoaP3SetElem( &a_Point3, 1, randfloat() );
+    vmathSoaP3SetElem( &a_Point3, 1, vec_mul_float( vmathSoaP3GetElem( &a_Point3, 1 ), randfloat() ) );
+    vmathSoaP3SetElem( &a_Point3, 1, divf4( vmathSoaP3GetElem( &a_Point3, 1 ), randfloat() ) );
+    vmathSoaP3SetElem( &a_Point3, 1, vec_add_float( vmathSoaP3GetElem( &a_Point3, 1 ), randfloat() ) );
+    vmathSoaP3SetElem( &a_Point3, 1, vec_sub_float( vmathSoaP3GetElem( &a_Point3, 1 ), randfloat() ) );
+    vmathSoaP3Prints( &a_Point3, "Point3::operator [](1)" );
+    vmathSoaP3SetY( &a_Point3, randfloat() );
+    vmathSoaP3Prints( &a_Point3, "Point3::setY()" );
+    vmathSoaP3SetElem( &a_Point3, 2, randfloat() );
+    vmathSoaP3Prints( &a_Point3, "Point3::set( 2, float )" );
+    vmathSoaP3SetElem( &a_Point3, 2, randfloat() );
+    vmathSoaP3SetElem( &a_Point3, 2, vec_mul_float( vmathSoaP3GetElem( &a_Point3, 2 ), randfloat() ) );
+    vmathSoaP3SetElem( &a_Point3, 2, divf4( vmathSoaP3GetElem( &a_Point3, 2 ), randfloat() ) );
+    vmathSoaP3SetElem( &a_Point3, 2, vec_add_float( vmathSoaP3GetElem( &a_Point3, 2 ), randfloat() ) );
+    vmathSoaP3SetElem( &a_Point3, 2, vec_sub_float( vmathSoaP3GetElem( &a_Point3, 2 ), randfloat() ) );
+    vmathSoaP3Prints( &a_Point3, "Point3::operator [](2)" );
+    vmathSoaP3SetZ( &a_Point3, randfloat() );
+    vmathSoaP3Prints( &a_Point3, "Point3::setZ()" );
+    printf("Point3::get( 0 ): %f\n", getfloat(vmathSoaP3GetElem( &a_Point3, 0 )) );
+    printf("Point3::operator []( 0 ): %f\n", getfloat(vmathSoaP3GetElem( &a_Point3, 0 )) );
+    printf("Point3::getX(): %f\n", getfloat(vmathSoaP3GetX( &a_Point3 )) );
+    printf("Point3::get( 1 ): %f\n", getfloat(vmathSoaP3GetElem( &a_Point3, 1 )) );
+    printf("Point3::operator []( 1 ): %f\n", getfloat(vmathSoaP3GetElem( &a_Point3, 1 )) );
+    printf("Point3::getY(): %f\n", getfloat(vmathSoaP3GetY( &a_Point3 )) );
+    printf("Point3::get( 2 ): %f\n", getfloat(vmathSoaP3GetElem( &a_Point3, 2 )) );
+    printf("Point3::operator []( 2 ): %f\n", getfloat(vmathSoaP3GetElem( &a_Point3, 2 )) );
+    printf("Point3::getZ(): %f\n", getfloat(vmathSoaP3GetZ( &a_Point3 )) );
+    vmathSoaP3Sub( &tmpV3_2, &a_Point3, &b_Point3 );
+    vmathSoaV3Prints( &tmpV3_2, "Point3 - Point3" );
+    vmathSoaP3AddV3( &tmpP3_0, &a_Point3, &b_Vector3 );
+    vmathSoaP3Prints( &tmpP3_0, "Point3 + Vector3" );
+    vmathSoaP3SubV3( &tmpP3_1, &a_Point3, &b_Vector3 );
+    vmathSoaP3Prints( &tmpP3_1, "Point3 - Vector3" );
+    vmathSoaP3MulPerElem( &tmpP3_2, &a_Point3, &b_Point3 );
+    vmathSoaP3Prints( &tmpP3_2, "mulPerElem( Point3, Point3 )" );
+    vmathSoaP3DivPerElem( &tmpP3_3, &a_Point3, &b_Point3 );
+    vmathSoaP3Prints( &tmpP3_3, "divPerElem( Point3, Point3 )" );
+    vmathSoaP3RecipPerElem( &tmpP3_4, &a_Point3 );
+    vmathSoaP3Prints( &tmpP3_4, "Point3 recip" );
+    vmathSoaP3AbsPerElem( &tmpP3_5, &a_Point3 );
+    vmathSoaP3SqrtPerElem( &tmpP3_6, &tmpP3_5 );
+    vmathSoaP3Prints( &tmpP3_6, "Point3 sqrt" );
+    vmathSoaP3AbsPerElem( &tmpP3_7, &a_Point3 );
+    vmathSoaP3RsqrtPerElem( &tmpP3_8, &tmpP3_7 );
+    vmathSoaP3Prints( &tmpP3_8, "Point3 rsqrt" );
+    vmathSoaP3AbsPerElem( &tmpP3_9, &a_Point3 );
+    vmathSoaP3Prints( &tmpP3_9, "Point3 abs" );
+    vmathSoaP3CopySignPerElem( &tmpP3_10, &a_Point3, &b_Point3 );
+    vmathSoaP3Prints( &tmpP3_10, "Point3 copySign" );
+    vmathSoaP3MaxPerElem( &tmpP3_11, &a_Point3, &b_Point3 );
+    vmathSoaP3Prints( &tmpP3_11, "Point3 maximum Point3" );
+    vmathSoaP3MinPerElem( &tmpP3_12, &a_Point3, &b_Point3 );
+    vmathSoaP3Prints( &tmpP3_12, "Point3 minimum Point3" );
+    printf("Point3 maximum of elements: %f\n", getfloat(vmathSoaP3MaxElem( &a_Point3 )));
+    printf("Point3 minimum of elements: %f\n", getfloat(vmathSoaP3MinElem( &a_Point3 )));
+    printf("Point3 sum of elements: %f\n", getfloat(vmathSoaP3Sum( &a_Point3 )));
+    printf("Point projection: %f\n", getfloat(vmathSoaP3Projection( &a_Point3, &b_Vector3 )));
+    printf("Point distSqrFromOrigin: %f\n", getfloat(vmathSoaP3DistSqrFromOrigin( &a_Point3 )) );
+    printf("Point distFromOrigin: %f\n", getfloat(vmathSoaP3DistFromOrigin( &a_Point3 )) );
+    printf("Point distSqr: %f\n", getfloat(vmathSoaP3DistSqr( &a_Point3, &b_Point3 )) );
+    printf("Point dist: %f\n", getfloat(vmathSoaP3Dist( &a_Point3, &b_Point3 )) );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &e_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Lerp( &a_Point3, randfloat(), &b_Point3, &c_Point3 );
+    vmathSoaP3Prints( &a_Point3, "Point3 lerp" );
+}
+
+void
+Quat_methods_test()
+{
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    VmathQuat aos_Quat_0, aos_Quat_1, aos_Quat_2, aos_Quat_3;
+    VmathSoaQuat soa_Quat;
+    VmathSoaVector3 tmpV3_0;
+    VmathSoaQuat tmpQ_0, tmpQ_1, tmpQ_2, tmpQ_3, tmpQ_4, tmpQ_5, tmpQ_6, tmpQ_7;
+    VmathSoaVector3 tmpV3_1;
+    VmathSoaQuat tmpQ_8;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3MakeFromScalar( &c_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3MakeFromScalar( &d_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3MakeFromScalar( &e_Vector3, (vec_float4){0.0f} );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 elements to zero" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 elements to zero" );
+    vmathSoaV3Prints( &e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4MakeFromScalar( &c_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4MakeFromScalar( &d_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4MakeFromScalar( &e_Vector4, (vec_float4){0.0f} );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 elements to zero" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 elements to zero" );
+    vmathSoaV4Prints( &e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3MakeFromScalar( &c_Point3, (vec_float4){0.0f} );
+    vmathSoaP3MakeFromScalar( &d_Point3, (vec_float4){0.0f} );
+    vmathSoaP3MakeFromScalar( &e_Point3, (vec_float4){0.0f} );
+    vmathSoaP3Prints( &c_Point3, "set Point3 elements to zero" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 elements to zero" );
+    vmathSoaP3Prints( &e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQMakeFromScalar( &c_Quat, (vec_float4){0.0f} );
+    vmathSoaQMakeFromScalar( &d_Quat, (vec_float4){0.0f} );
+    vmathSoaQMakeFromScalar( &e_Quat, (vec_float4){0.0f} );
+    vmathSoaQPrints( &c_Quat, "set Quat elements to zero" );
+    vmathSoaQPrints( &d_Quat, "set Quat elements to zero" );
+    vmathSoaQPrints( &e_Quat, "set Quat elements to zero" );
+    vmathSoaQMakeFromV3Scalar( &a_Quat, &a_Vector3, randfloat() );
+    vmathSoaQPrints( &a_Quat, "set Quat with Vector3, float" );
+    vmathSoaQMakeFromV4( &a_Quat, &a_Vector4 );
+    vmathSoaQPrints( &a_Quat, "construct Quat with Vector4" );
+    vmathSoaQMakeFromScalar( &a_Quat, randfloat() );
+    vmathSoaQPrints( &a_Quat, "set Quat with float" );
+    vmathSoaQMakeFromScalar( &a_Quat, randfloat() );
+    vmathSoaQPrints( &a_Quat, "set Quat with float" );
+    vmathQMakeFromElems( &aos_Quat_0, 0.0f, 1.0f, 2.0f, 3.0f );
+    vmathQMakeFromElems( &aos_Quat_1, 4.0f, 5.0f, 6.0f, 7.0f );
+    vmathQMakeFromElems( &aos_Quat_2, 8.0f, 9.0f, 10.0f, 11.0f );
+    vmathQMakeFromElems( &aos_Quat_3, 12.0f, 13.0f, 14.0f, 15.0f );
+    vmathSoaQMakeFrom4Aos( &soa_Quat, &aos_Quat_0, &aos_Quat_1, &aos_Quat_2, &aos_Quat_3 );
+    vmathSoaQGet4Aos( &soa_Quat, &aos_Quat_3, &aos_Quat_2, &aos_Quat_1, &aos_Quat_0 );
+    vmathQPrints( &aos_Quat_0, "aos type 0" );
+    vmathQPrints( &aos_Quat_1, "aos type 1" );
+    vmathQPrints( &aos_Quat_2, "aos type 2" );
+    vmathQPrints( &aos_Quat_3, "aos type 3" );
+    vmathSoaQSelect( &a_Quat, &a_Quat, &b_Quat, ((vec_uint4){0,0xffffffff,0,0xffffffff}) );
+    vmathSoaQGet4Aos( &a_Quat, &aos_Quat_0, &aos_Quat_1, &aos_Quat_2, &aos_Quat_3 );
+    vmathQPrints( &aos_Quat_0, "select 0" );
+    vmathQPrints( &aos_Quat_1, "select 1" );
+    vmathQPrints( &aos_Quat_2, "select 2" );
+    vmathQPrints( &aos_Quat_3, "select 3" );
+    vmathSoaQCopy( &a_Quat, &b_Quat );
+    vmathSoaQPrints( &a_Quat, "assign to Quat from Quat" );
+    vmathSoaQSetXYZ( &a_Quat, &a_Vector3 );
+    vmathSoaQPrints( &a_Quat, "set Quat xyz" );
+    vmathSoaQGetXYZ( &tmpV3_0, &a_Quat );
+    vmathSoaV3Prints( &tmpV3_0, "get Quat xyz" );
+    vmathSoaQMakeFromScalar( &a_Quat, (vec_float4){0.0f} );
+    vmathSoaQPrints( &a_Quat, "set Quat elements to zero" );
+    vmathSoaQSetElem( &a_Quat, 0, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat::set( 0, float )" );
+    vmathSoaQSetElem( &a_Quat, 0, randfloat() );
+    vmathSoaQSetElem( &a_Quat, 0, vec_mul_float( vmathSoaQGetElem( &a_Quat, 0 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 0, divf4( vmathSoaQGetElem( &a_Quat, 0 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 0, vec_add_float( vmathSoaQGetElem( &a_Quat, 0 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 0, vec_sub_float( vmathSoaQGetElem( &a_Quat, 0 ), randfloat() ) );
+    vmathSoaQPrints( &a_Quat, "Quat::operator [](0)" );
+    vmathSoaQSetX( &a_Quat, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat::setX()" );
+    vmathSoaQSetElem( &a_Quat, 1, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat::set( 1, float )" );
+    vmathSoaQSetElem( &a_Quat, 1, randfloat() );
+    vmathSoaQSetElem( &a_Quat, 1, vec_mul_float( vmathSoaQGetElem( &a_Quat, 1 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 1, divf4( vmathSoaQGetElem( &a_Quat, 1 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 1, vec_add_float( vmathSoaQGetElem( &a_Quat, 1 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 1, vec_sub_float( vmathSoaQGetElem( &a_Quat, 1 ), randfloat() ) );
+    vmathSoaQPrints( &a_Quat, "Quat::operator [](1)" );
+    vmathSoaQSetY( &a_Quat, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat::setY()" );
+    vmathSoaQSetElem( &a_Quat, 2, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat::set( 2, float )" );
+    vmathSoaQSetElem( &a_Quat, 2, randfloat() );
+    vmathSoaQSetElem( &a_Quat, 2, vec_mul_float( vmathSoaQGetElem( &a_Quat, 2 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 2, divf4( vmathSoaQGetElem( &a_Quat, 2 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 2, vec_add_float( vmathSoaQGetElem( &a_Quat, 2 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 2, vec_sub_float( vmathSoaQGetElem( &a_Quat, 2 ), randfloat() ) );
+    vmathSoaQPrints( &a_Quat, "Quat::operator [](2)" );
+    vmathSoaQSetZ( &a_Quat, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat::setZ()" );
+    vmathSoaQSetElem( &a_Quat, 3, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat::set( 3, float )" );
+    vmathSoaQSetElem( &a_Quat, 3, randfloat() );
+    vmathSoaQSetElem( &a_Quat, 3, vec_mul_float( vmathSoaQGetElem( &a_Quat, 3 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 3, divf4( vmathSoaQGetElem( &a_Quat, 3 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 3, vec_add_float( vmathSoaQGetElem( &a_Quat, 3 ), randfloat() ) );
+    vmathSoaQSetElem( &a_Quat, 3, vec_sub_float( vmathSoaQGetElem( &a_Quat, 3 ), randfloat() ) );
+    vmathSoaQPrints( &a_Quat, "Quat::operator [](3)" );
+    vmathSoaQSetW( &a_Quat, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat::setW()" );
+    printf("Quat::get( 0 ): %f\n", getfloat(vmathSoaQGetElem( &a_Quat, 0 )) );
+    printf("Quat::operator []( 0 ): %f\n", getfloat(vmathSoaQGetElem( &a_Quat, 0 )) );
+    printf("Quat::getX(): %f\n", getfloat(vmathSoaQGetX( &a_Quat )) );
+    printf("Quat::get( 1 ): %f\n", getfloat(vmathSoaQGetElem( &a_Quat, 1 )) );
+    printf("Quat::operator []( 1 ): %f\n", getfloat(vmathSoaQGetElem( &a_Quat, 1 )) );
+    printf("Quat::getY(): %f\n", getfloat(vmathSoaQGetY( &a_Quat )) );
+    printf("Quat::get( 2 ): %f\n", getfloat(vmathSoaQGetElem( &a_Quat, 2 )) );
+    printf("Quat::operator []( 2 ): %f\n", getfloat(vmathSoaQGetElem( &a_Quat, 2 )) );
+    printf("Quat::getZ(): %f\n", getfloat(vmathSoaQGetZ( &a_Quat )) );
+    printf("Quat::get( 3 ): %f\n", getfloat(vmathSoaQGetElem( &a_Quat, 3 )) );
+    printf("Quat::operator []( 3 ): %f\n", getfloat(vmathSoaQGetElem( &a_Quat, 3 )) );
+    printf("Quat::getW(): %f\n", getfloat(vmathSoaQGetW( &a_Quat )) );
+    vmathSoaQAdd( &tmpQ_0, &a_Quat, &b_Quat );
+    vmathSoaQPrints( &tmpQ_0, "Quat + Quat" );
+    vmathSoaQSub( &tmpQ_1, &a_Quat, &b_Quat );
+    vmathSoaQPrints( &tmpQ_1, "Quat - Quat" );
+    vmathSoaQMul( &tmpQ_2, &a_Quat, &b_Quat );
+    vmathSoaQPrints( &tmpQ_2, "Quat * Quat" );
+    vmathSoaQScalarMul( &tmpQ_3, &a_Quat, randfloat() );
+    vmathSoaQPrints( &tmpQ_3, "Quat * float" );
+    vmathSoaQScalarDiv( &tmpQ_4, &a_Quat, randfloat() );
+    vmathSoaQPrints( &tmpQ_4, "Quat / float" );
+    vmathSoaQScalarMul( &tmpQ_5, &a_Quat, randfloat() );
+    vmathSoaQPrints( &tmpQ_5, "float * Quat" );
+    vmathSoaQNeg( &tmpQ_6, &a_Quat );
+    vmathSoaQPrints( &tmpQ_6, "Quat negate" );
+    printf("Quat dot Quat: %f\n", getfloat(vmathSoaQDot( &a_Quat, &b_Quat )));
+    printf("Quat lengthSqr: %f\n", getfloat(vmathSoaQNorm( &a_Quat )));
+    printf("Quat length: %f\n", getfloat(vmathSoaQLength( &a_Quat )));
+    vmathSoaQNormalize( &tmpQ_7, &a_Quat );
+    vmathSoaQPrints( &tmpQ_7, "Quat normalized" );
+    vmathSoaQMakeIdentity( &a_Quat );
+    vmathSoaQPrints( &a_Quat, "set to identity" );
+    vmathSoaQMakeRotationArc( &a_Quat, &a_Vector3, &b_Vector3 );
+    vmathSoaQPrints( &a_Quat, "Quat rotation between vectors" );
+    vmathSoaQMakeRotationAxis( &a_Quat, randfloat(), &a_Vector3 );
+    vmathSoaQPrints( &a_Quat, "Quat rotation axis angle" );
+    vmathSoaQMakeRotationX( &a_Quat, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat rotationX" );
+    vmathSoaQMakeRotationY( &a_Quat, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat rotationY" );
+    vmathSoaQMakeRotationZ( &a_Quat, randfloat() );
+    vmathSoaQPrints( &a_Quat, "Quat rotationZ" );
+    vmathSoaQRotate( &tmpV3_1, &a_Quat, &a_Vector3 );
+    vmathSoaV3Prints( &tmpV3_1, "Quat rotate Vector3" );
+    vmathSoaQConj( &tmpQ_8, &a_Quat );
+    vmathSoaQPrints( &tmpQ_8, "Quat conj" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &e_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQNormalize( &b_Quat, &b_Quat );
+    vmathSoaQNormalize( &c_Quat, &c_Quat );
+    vmathSoaQNormalize( &d_Quat, &d_Quat );
+    vmathSoaQNormalize( &e_Quat, &e_Quat );
+    vmathSoaQLerp( &a_Quat, randfloat(), &b_Quat, &c_Quat );
+    vmathSoaQPrints( &a_Quat, "Quat lerp" );
+    vmathSoaQSlerp( &a_Quat, randfloat(), &b_Quat, &c_Quat );
+    vmathSoaQPrints( &a_Quat, "Quat slerp" );
+    vmathSoaQSquad( &a_Quat, randfloat(), &b_Quat, &c_Quat, &d_Quat, &e_Quat );
+    vmathSoaQPrints( &a_Quat, "Quat squad" );
+}
+
+int main()
+{
+    printf("\n __begin__ \n");
+    for ( iteration = 0; iteration < 2; iteration++ ) {
+        Vector3_methods_test();
+        Vector4_methods_test();
+        Point3_methods_test();
+        Quat_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test1_soa_cpp.cpp b/Extras/vectormathlibrary/tests/test1_soa_cpp.cpp
new file mode 100644
index 000000000..daaeec45c
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test1_soa_cpp.cpp
@@ -0,0 +1,931 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_SOA_TEST
+
+#include "vectormath_soa.h"
+#include "test.h"
+
+int iteration = 0;
+
+using namespace Vectormath;
+using namespace Vectormath::Soa;
+
+void
+Vector3_methods_test()
+{
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    Aos::Vector3 aos_Vector3_0, aos_Vector3_1, aos_Vector3_2, aos_Vector3_3;
+    Vector3 soa_Vector3;
+    Aos::Vector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4;
+    float xyz4[12] __attribute__ ((aligned(16)));
+    xyz4[0] = getfloat(randfloat());
+    xyz4[1] = getfloat(randfloat());
+    xyz4[2] = getfloat(randfloat());
+    xyz4[3] = getfloat(randfloat());
+    xyz4[4] = getfloat(randfloat());
+    xyz4[5] = getfloat(randfloat());
+    xyz4[6] = getfloat(randfloat());
+    xyz4[7] = getfloat(randfloat());
+    xyz4[8] = getfloat(randfloat());
+    xyz4[9] = getfloat(randfloat());
+    xyz4[10] = getfloat(randfloat());
+    xyz4[11] = getfloat(randfloat());
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    c_Vector3 = Vector3( (vec_float4){0.0f} );
+    d_Vector3 = Vector3( (vec_float4){0.0f} );
+    e_Vector3 = Vector3( (vec_float4){0.0f} );
+    print( c_Vector3, "set Vector3 elements to zero" );
+    print( d_Vector3, "set Vector3 elements to zero" );
+    print( e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    c_Vector4 = Vector4( (vec_float4){0.0f} );
+    d_Vector4 = Vector4( (vec_float4){0.0f} );
+    e_Vector4 = Vector4( (vec_float4){0.0f} );
+    print( c_Vector4, "set Vector4 elements to zero" );
+    print( d_Vector4, "set Vector4 elements to zero" );
+    print( e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    c_Point3 = Point3( (vec_float4){0.0f} );
+    d_Point3 = Point3( (vec_float4){0.0f} );
+    e_Point3 = Point3( (vec_float4){0.0f} );
+    print( c_Point3, "set Point3 elements to zero" );
+    print( d_Point3, "set Point3 elements to zero" );
+    print( e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    c_Quat = Quat( (vec_float4){0.0f} );
+    d_Quat = Quat( (vec_float4){0.0f} );
+    e_Quat = Quat( (vec_float4){0.0f} );
+    print( c_Quat, "set Quat elements to zero" );
+    print( d_Quat, "set Quat elements to zero" );
+    print( e_Quat, "set Quat elements to zero" );
+    a_Vector3 = Vector3( a_Point3 );
+    print( a_Vector3, "construct Vector3 with Point3" );
+    a_Vector3 = Vector3( randfloat() );
+    print( a_Vector3, "set Vector3 with float" );
+    a_Vector3 = Vector3( randfloat() );
+    print( a_Vector3, "set Vector3 with float" );
+    aos_Vector3_0 = Aos::Vector3( 0.0f, 1.0f, 2.0f );
+    aos_Vector3_1 = Aos::Vector3( 3.0f, 4.0f, 5.0f );
+    aos_Vector3_2 = Aos::Vector3( 6.0f, 7.0f, 8.0f );
+    aos_Vector3_3 = Aos::Vector3( 9.0f, 10.0f, 11.0f );
+    soa_Vector3 = Vector3( aos_Vector3_0, aos_Vector3_1, aos_Vector3_2, aos_Vector3_3 );
+    soa_Vector3.get4Aos( aos_Vector3_3, aos_Vector3_2, aos_Vector3_1, aos_Vector3_0 );
+    Aos::print( aos_Vector3_0, "aos type 0" );
+    Aos::print( aos_Vector3_1, "aos type 1" );
+    Aos::print( aos_Vector3_2, "aos type 2" );
+    Aos::print( aos_Vector3_3, "aos type 3" );
+    a_Vector3 = select( a_Vector3, b_Vector3, ((vec_uint4){0,0xffffffff,0,0xffffffff}) );
+    a_Vector3.get4Aos( aos_Vector3_0, aos_Vector3_1, aos_Vector3_2, aos_Vector3_3 );
+    Aos::print( aos_Vector3_0, "select 0" );
+    Aos::print( aos_Vector3_1, "select 1" );
+    Aos::print( aos_Vector3_2, "select 2" );
+    Aos::print( aos_Vector3_3, "select 3" );
+    loadXYZArray( a_Vector3, (const vec_float4 *)xyz4 );
+    print( a_Vector3, "load XYZ array" );
+    a_Vector3 = Vector3( ( -Vector3( a_Vector3 ) ) );
+    storeXYZArray( a_Vector3, (vec_float4 *)xyz4 );
+    aos_Vector4_0 = Aos::Vector4( xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    aos_Vector4_1 = Aos::Vector4( xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    aos_Vector4_2 = Aos::Vector4( xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    Aos::print( aos_Vector4_0, "xyzx" );
+    Aos::print( aos_Vector4_1, "yzxy" );
+    Aos::print( aos_Vector4_2, "zxyz" );
+    Aos::print( aos_Vector4_0, "xyzx" );
+    Aos::print( aos_Vector4_1, "yzxy" );
+    Aos::print( aos_Vector4_2, "zxyz" );
+    printf("storeXYZ:-1.0 -2.0 -3.0 0.4\n");
+    a_Vector3 = b_Vector3;
+    print( a_Vector3, "assign to Vector3 from Vector3" );
+    a_Vector3 = Vector3( (vec_float4){0.0f} );
+    print( a_Vector3, "set Vector3 elements to zero" );
+    a_Vector3 = Vector3::xAxis( );
+    print( a_Vector3, "set to x axis" );
+    a_Vector3 = Vector3::yAxis( );
+    print( a_Vector3, "set to y axis" );
+    a_Vector3 = Vector3::zAxis( );
+    print( a_Vector3, "set to z axis" );
+    a_Vector3.setElem( 0, randfloat() );
+    print( a_Vector3, "Vector3::set( 0, float )" );
+    a_Vector3[0] = randfloat();
+    a_Vector3[0] = vec_mul_float( a_Vector3[0], randfloat() );
+    a_Vector3[0] = divf4( a_Vector3[0], randfloat() );
+    a_Vector3[0] = vec_add_float( a_Vector3[0], randfloat() );
+    a_Vector3[0] = vec_sub_float( a_Vector3[0], randfloat() );
+    print( a_Vector3, "Vector3::operator [](0)" );
+    a_Vector3.setX( randfloat() );
+    print( a_Vector3, "Vector3::setX()" );
+    a_Vector3.setElem( 1, randfloat() );
+    print( a_Vector3, "Vector3::set( 1, float )" );
+    a_Vector3[1] = randfloat();
+    a_Vector3[1] = vec_mul_float( a_Vector3[1], randfloat() );
+    a_Vector3[1] = divf4( a_Vector3[1], randfloat() );
+    a_Vector3[1] = vec_add_float( a_Vector3[1], randfloat() );
+    a_Vector3[1] = vec_sub_float( a_Vector3[1], randfloat() );
+    print( a_Vector3, "Vector3::operator [](1)" );
+    a_Vector3.setY( randfloat() );
+    print( a_Vector3, "Vector3::setY()" );
+    a_Vector3.setElem( 2, randfloat() );
+    print( a_Vector3, "Vector3::set( 2, float )" );
+    a_Vector3[2] = randfloat();
+    a_Vector3[2] = vec_mul_float( a_Vector3[2], randfloat() );
+    a_Vector3[2] = divf4( a_Vector3[2], randfloat() );
+    a_Vector3[2] = vec_add_float( a_Vector3[2], randfloat() );
+    a_Vector3[2] = vec_sub_float( a_Vector3[2], randfloat() );
+    print( a_Vector3, "Vector3::operator [](2)" );
+    a_Vector3.setZ( randfloat() );
+    print( a_Vector3, "Vector3::setZ()" );
+    printf("Vector3::get( 0 ): %f\n", getfloat(a_Vector3.getElem( 0 )) );
+    printf("Vector3::operator []( 0 ): %f\n", getfloat((vec_float4)a_Vector3[0]) );
+    printf("Vector3::getX(): %f\n", getfloat(a_Vector3.getX( )) );
+    printf("Vector3::get( 1 ): %f\n", getfloat(a_Vector3.getElem( 1 )) );
+    printf("Vector3::operator []( 1 ): %f\n", getfloat((vec_float4)a_Vector3[1]) );
+    printf("Vector3::getY(): %f\n", getfloat(a_Vector3.getY( )) );
+    printf("Vector3::get( 2 ): %f\n", getfloat(a_Vector3.getElem( 2 )) );
+    printf("Vector3::operator []( 2 ): %f\n", getfloat((vec_float4)a_Vector3[2]) );
+    printf("Vector3::getZ(): %f\n", getfloat(a_Vector3.getZ( )) );
+    print( ( a_Vector3 + b_Vector3 ), "Vector3 + Vector3" );
+    print( ( a_Vector3 - b_Vector3 ), "Vector3 - Vector3" );
+    print( ( a_Vector3 + b_Point3 ), "Vector3 + Point3" );
+    print( ( a_Vector3 * randfloat() ), "Vector3 * float" );
+    print( ( a_Vector3 / randfloat() ), "Vector3 / float" );
+    print( ( randfloat() * a_Vector3 ), "float * Vector3" );
+    print( ( -a_Vector3 ), "Vector3 negate" );
+    print( mulPerElem( a_Vector3, b_Vector3 ), "mulPerElem( Vector3, Vector3 )" );
+    print( divPerElem( a_Vector3, b_Vector3 ), "divPerElem( Vector3, Vector3 )" );
+    print( recipPerElem( a_Vector3 ), "Vector3 recip" );
+    print( sqrtPerElem( absPerElem( a_Vector3 ) ), "Vector3 sqrt" );
+    print( rsqrtPerElem( absPerElem( a_Vector3 ) ), "Vector3 rsqrt" );
+    print( absPerElem( a_Vector3 ), "Vector3 abs" );
+    print( copySignPerElem( a_Vector3, b_Vector3 ), "Vector3 copySign" );
+    print( maxPerElem( a_Vector3, b_Vector3 ), "Vector3 maximum Vector3" );
+    print( minPerElem( a_Vector3, b_Vector3 ), "Vector3 minimum Vector3" );
+    printf("Vector3 maximum of elements: %f\n", getfloat(maxElem( a_Vector3 )));
+    printf("Vector3 minimum of elements: %f\n", getfloat(minElem( a_Vector3 )));
+    printf("Vector3 sum of elements: %f\n", getfloat(sum( a_Vector3 )));
+    printf("Vector3 dot Vector3: %f\n", getfloat(dot( a_Vector3, b_Vector3 )));
+    printf("Vector3 lengthSqr: %f\n", getfloat(lengthSqr( a_Vector3 )));
+    printf("Vector3 length: %f\n", getfloat(length( a_Vector3 )));
+    print( normalize( a_Vector3 ), "Vector3 normalized" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    e_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    b_Vector3 = normalize( b_Vector3 );
+    c_Vector3 = normalize( c_Vector3 );
+    d_Vector3 = normalize( d_Vector3 );
+    e_Vector3 = normalize( e_Vector3 );
+    a_Vector3 = lerp( randfloat(), b_Vector3, c_Vector3 );
+    print( a_Vector3, "Vector3 lerp" );
+    a_Vector3 = slerp( randfloat(), b_Vector3, c_Vector3 );
+    print( a_Vector3, "Vector3 slerp" );
+}
+
+void
+Vector4_methods_test()
+{
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    Aos::Vector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2, aos_Vector4_3;
+    Vector4 soa_Vector4;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    c_Vector3 = Vector3( (vec_float4){0.0f} );
+    d_Vector3 = Vector3( (vec_float4){0.0f} );
+    e_Vector3 = Vector3( (vec_float4){0.0f} );
+    print( c_Vector3, "set Vector3 elements to zero" );
+    print( d_Vector3, "set Vector3 elements to zero" );
+    print( e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    c_Vector4 = Vector4( (vec_float4){0.0f} );
+    d_Vector4 = Vector4( (vec_float4){0.0f} );
+    e_Vector4 = Vector4( (vec_float4){0.0f} );
+    print( c_Vector4, "set Vector4 elements to zero" );
+    print( d_Vector4, "set Vector4 elements to zero" );
+    print( e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    c_Point3 = Point3( (vec_float4){0.0f} );
+    d_Point3 = Point3( (vec_float4){0.0f} );
+    e_Point3 = Point3( (vec_float4){0.0f} );
+    print( c_Point3, "set Point3 elements to zero" );
+    print( d_Point3, "set Point3 elements to zero" );
+    print( e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    c_Quat = Quat( (vec_float4){0.0f} );
+    d_Quat = Quat( (vec_float4){0.0f} );
+    e_Quat = Quat( (vec_float4){0.0f} );
+    print( c_Quat, "set Quat elements to zero" );
+    print( d_Quat, "set Quat elements to zero" );
+    print( e_Quat, "set Quat elements to zero" );
+    a_Vector4 = Vector4( a_Vector3, randfloat() );
+    print( a_Vector4, "set Vector4 with Vector3, float" );
+    a_Vector4 = Vector4( a_Vector3 );
+    print( a_Vector4, "set Vector4 with Vector3" );
+    a_Vector4 = Vector4( a_Point3 );
+    print( a_Vector4, "set Vector4 with Point3" );
+    a_Vector4 = Vector4( a_Quat );
+    print( a_Vector4, "construct Vector4 with Quat" );
+    a_Vector4 = Vector4( randfloat() );
+    print( a_Vector4, "set Vector4 with float" );
+    a_Vector4 = Vector4( randfloat() );
+    print( a_Vector4, "set Vector4 with float" );
+    aos_Vector4_0 = Aos::Vector4( 0.0f, 1.0f, 2.0f, 3.0f );
+    aos_Vector4_1 = Aos::Vector4( 4.0f, 5.0f, 6.0f, 7.0f );
+    aos_Vector4_2 = Aos::Vector4( 8.0f, 9.0f, 10.0f, 11.0f );
+    aos_Vector4_3 = Aos::Vector4( 12.0f, 13.0f, 14.0f, 15.0f );
+    soa_Vector4 = Vector4( aos_Vector4_0, aos_Vector4_1, aos_Vector4_2, aos_Vector4_3 );
+    soa_Vector4.get4Aos( aos_Vector4_3, aos_Vector4_2, aos_Vector4_1, aos_Vector4_0 );
+    Aos::print( aos_Vector4_0, "aos type 0" );
+    Aos::print( aos_Vector4_1, "aos type 1" );
+    Aos::print( aos_Vector4_2, "aos type 2" );
+    Aos::print( aos_Vector4_3, "aos type 3" );
+    a_Vector4 = select( a_Vector4, b_Vector4, ((vec_uint4){0,0xffffffff,0,0xffffffff}) );
+    a_Vector4.get4Aos( aos_Vector4_0, aos_Vector4_1, aos_Vector4_2, aos_Vector4_3 );
+    Aos::print( aos_Vector4_0, "select 0" );
+    Aos::print( aos_Vector4_1, "select 1" );
+    Aos::print( aos_Vector4_2, "select 2" );
+    Aos::print( aos_Vector4_3, "select 3" );
+    a_Vector4 = b_Vector4;
+    print( a_Vector4, "assign to Vector4 from Vector4" );
+    a_Vector4.setXYZ( a_Vector3 );
+    print( a_Vector4, "set Vector4 xyz" );
+    print( a_Vector4.getXYZ( ), "get Vector4 xyz" );
+    a_Vector4 = Vector4( (vec_float4){0.0f} );
+    print( a_Vector4, "set Vector4 elements to zero" );
+    a_Vector4 = Vector4::xAxis( );
+    print( a_Vector4, "set to x axis" );
+    a_Vector4 = Vector4::yAxis( );
+    print( a_Vector4, "set to y axis" );
+    a_Vector4 = Vector4::zAxis( );
+    print( a_Vector4, "set to z axis" );
+    a_Vector4 = Vector4::wAxis( );
+    print( a_Vector4, "set to w axis" );
+    a_Vector4.setElem( 0, randfloat() );
+    print( a_Vector4, "Vector4::set( 0, float )" );
+    a_Vector4[0] = randfloat();
+    a_Vector4[0] = vec_mul_float( a_Vector4[0], randfloat() );
+    a_Vector4[0] = divf4( a_Vector4[0], randfloat() );
+    a_Vector4[0] = vec_add_float( a_Vector4[0], randfloat() );
+    a_Vector4[0] = vec_sub_float( a_Vector4[0], randfloat() );
+    print( a_Vector4, "Vector4::operator [](0)" );
+    a_Vector4.setX( randfloat() );
+    print( a_Vector4, "Vector4::setX()" );
+    a_Vector4.setElem( 1, randfloat() );
+    print( a_Vector4, "Vector4::set( 1, float )" );
+    a_Vector4[1] = randfloat();
+    a_Vector4[1] = vec_mul_float( a_Vector4[1], randfloat() );
+    a_Vector4[1] = divf4( a_Vector4[1], randfloat() );
+    a_Vector4[1] = vec_add_float( a_Vector4[1], randfloat() );
+    a_Vector4[1] = vec_sub_float( a_Vector4[1], randfloat() );
+    print( a_Vector4, "Vector4::operator [](1)" );
+    a_Vector4.setY( randfloat() );
+    print( a_Vector4, "Vector4::setY()" );
+    a_Vector4.setElem( 2, randfloat() );
+    print( a_Vector4, "Vector4::set( 2, float )" );
+    a_Vector4[2] = randfloat();
+    a_Vector4[2] = vec_mul_float( a_Vector4[2], randfloat() );
+    a_Vector4[2] = divf4( a_Vector4[2], randfloat() );
+    a_Vector4[2] = vec_add_float( a_Vector4[2], randfloat() );
+    a_Vector4[2] = vec_sub_float( a_Vector4[2], randfloat() );
+    print( a_Vector4, "Vector4::operator [](2)" );
+    a_Vector4.setZ( randfloat() );
+    print( a_Vector4, "Vector4::setZ()" );
+    a_Vector4.setElem( 3, randfloat() );
+    print( a_Vector4, "Vector4::set( 3, float )" );
+    a_Vector4[3] = randfloat();
+    a_Vector4[3] = vec_mul_float( a_Vector4[3], randfloat() );
+    a_Vector4[3] = divf4( a_Vector4[3], randfloat() );
+    a_Vector4[3] = vec_add_float( a_Vector4[3], randfloat() );
+    a_Vector4[3] = vec_sub_float( a_Vector4[3], randfloat() );
+    print( a_Vector4, "Vector4::operator [](3)" );
+    a_Vector4.setW( randfloat() );
+    print( a_Vector4, "Vector4::setW()" );
+    printf("Vector4::get( 0 ): %f\n", getfloat(a_Vector4.getElem( 0 )) );
+    printf("Vector4::operator []( 0 ): %f\n", getfloat((vec_float4)a_Vector4[0]) );
+    printf("Vector4::getX(): %f\n", getfloat(a_Vector4.getX( )) );
+    printf("Vector4::get( 1 ): %f\n", getfloat(a_Vector4.getElem( 1 )) );
+    printf("Vector4::operator []( 1 ): %f\n", getfloat((vec_float4)a_Vector4[1]) );
+    printf("Vector4::getY(): %f\n", getfloat(a_Vector4.getY( )) );
+    printf("Vector4::get( 2 ): %f\n", getfloat(a_Vector4.getElem( 2 )) );
+    printf("Vector4::operator []( 2 ): %f\n", getfloat((vec_float4)a_Vector4[2]) );
+    printf("Vector4::getZ(): %f\n", getfloat(a_Vector4.getZ( )) );
+    printf("Vector4::get( 3 ): %f\n", getfloat(a_Vector4.getElem( 3 )) );
+    printf("Vector4::operator []( 3 ): %f\n", getfloat((vec_float4)a_Vector4[3]) );
+    printf("Vector4::getW(): %f\n", getfloat(a_Vector4.getW( )) );
+    print( ( a_Vector4 + b_Vector4 ), "Vector4 + Vector4" );
+    print( ( a_Vector4 - b_Vector4 ), "Vector4 - Vector4" );
+    print( ( a_Vector4 * randfloat() ), "Vector4 * float" );
+    print( ( a_Vector4 / randfloat() ), "Vector4 / float" );
+    print( ( randfloat() * a_Vector4 ), "float * Vector4" );
+    print( ( -a_Vector4 ), "Vector4 negate" );
+    print( mulPerElem( a_Vector4, b_Vector4 ), "mulPerElem( Vector4, Vector4 )" );
+    print( divPerElem( a_Vector4, b_Vector4 ), "divPerElem( Vector4, Vector4 )" );
+    print( recipPerElem( a_Vector4 ), "Vector4 recip" );
+    print( sqrtPerElem( absPerElem( a_Vector4 ) ), "Vector4 sqrt" );
+    print( rsqrtPerElem( absPerElem( a_Vector4 ) ), "Vector4 rsqrt" );
+    print( absPerElem( a_Vector4 ), "Vector4 abs" );
+    print( copySignPerElem( a_Vector4, b_Vector4 ), "Vector4 copySign" );
+    print( maxPerElem( a_Vector4, b_Vector4 ), "Vector4 maximum Vector4" );
+    print( minPerElem( a_Vector4, b_Vector4 ), "Vector4 minimum Vector4" );
+    printf("Vector4 maximum of elements: %f\n", getfloat(maxElem( a_Vector4 )));
+    printf("Vector4 minimum of elements: %f\n", getfloat(minElem( a_Vector4 )));
+    printf("Vector4 sum of elements: %f\n", getfloat(sum( a_Vector4 )));
+    printf("Vector4 dot Vector4: %f\n", getfloat(dot( a_Vector4, b_Vector4 )));
+    printf("Vector4 lengthSqr: %f\n", getfloat(lengthSqr( a_Vector4 )));
+    printf("Vector4 length: %f\n", getfloat(length( a_Vector4 )));
+    print( normalize( a_Vector4 ), "Vector4 normalized" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    e_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    b_Vector4 = normalize( b_Vector4 );
+    c_Vector4 = normalize( c_Vector4 );
+    d_Vector4 = normalize( d_Vector4 );
+    e_Vector4 = normalize( e_Vector4 );
+    a_Vector4 = lerp( randfloat(), b_Vector4, c_Vector4 );
+    print( a_Vector4, "Vector4 lerp" );
+    a_Vector4 = slerp( randfloat(), b_Vector4, c_Vector4 );
+    print( a_Vector4, "Vector4 slerp" );
+}
+
+void
+Point3_methods_test()
+{
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    Aos::Point3 aos_Point3_0, aos_Point3_1, aos_Point3_2, aos_Point3_3;
+    Point3 soa_Point3;
+    Aos::Vector4 aos_Vector4_0, aos_Vector4_1, aos_Vector4_2;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4;
+    float xyz4[12] __attribute__ ((aligned(16)));
+    xyz4[0] = getfloat(randfloat());
+    xyz4[1] = getfloat(randfloat());
+    xyz4[2] = getfloat(randfloat());
+    xyz4[3] = getfloat(randfloat());
+    xyz4[4] = getfloat(randfloat());
+    xyz4[5] = getfloat(randfloat());
+    xyz4[6] = getfloat(randfloat());
+    xyz4[7] = getfloat(randfloat());
+    xyz4[8] = getfloat(randfloat());
+    xyz4[9] = getfloat(randfloat());
+    xyz4[10] = getfloat(randfloat());
+    xyz4[11] = getfloat(randfloat());
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    c_Vector3 = Vector3( (vec_float4){0.0f} );
+    d_Vector3 = Vector3( (vec_float4){0.0f} );
+    e_Vector3 = Vector3( (vec_float4){0.0f} );
+    print( c_Vector3, "set Vector3 elements to zero" );
+    print( d_Vector3, "set Vector3 elements to zero" );
+    print( e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    c_Vector4 = Vector4( (vec_float4){0.0f} );
+    d_Vector4 = Vector4( (vec_float4){0.0f} );
+    e_Vector4 = Vector4( (vec_float4){0.0f} );
+    print( c_Vector4, "set Vector4 elements to zero" );
+    print( d_Vector4, "set Vector4 elements to zero" );
+    print( e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    c_Point3 = Point3( (vec_float4){0.0f} );
+    d_Point3 = Point3( (vec_float4){0.0f} );
+    e_Point3 = Point3( (vec_float4){0.0f} );
+    print( c_Point3, "set Point3 elements to zero" );
+    print( d_Point3, "set Point3 elements to zero" );
+    print( e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    c_Quat = Quat( (vec_float4){0.0f} );
+    d_Quat = Quat( (vec_float4){0.0f} );
+    e_Quat = Quat( (vec_float4){0.0f} );
+    print( c_Quat, "set Quat elements to zero" );
+    print( d_Quat, "set Quat elements to zero" );
+    print( e_Quat, "set Quat elements to zero" );
+    a_Point3 = Point3( a_Vector3 );
+    print( a_Point3, "construct Point3 with Vector3" );
+    a_Point3 = Point3( randfloat() );
+    print( a_Point3, "set Point3 with float" );
+    a_Point3 = Point3( randfloat() );
+    print( a_Point3, "set Point3 with float" );
+    aos_Point3_0 = Aos::Point3( 0.0f, 1.0f, 2.0f );
+    aos_Point3_1 = Aos::Point3( 3.0f, 4.0f, 5.0f );
+    aos_Point3_2 = Aos::Point3( 6.0f, 7.0f, 8.0f );
+    aos_Point3_3 = Aos::Point3( 9.0f, 10.0f, 11.0f );
+    soa_Point3 = Point3( aos_Point3_0, aos_Point3_1, aos_Point3_2, aos_Point3_3 );
+    soa_Point3.get4Aos( aos_Point3_3, aos_Point3_2, aos_Point3_1, aos_Point3_0 );
+    Aos::print( aos_Point3_0, "aos type 0" );
+    Aos::print( aos_Point3_1, "aos type 1" );
+    Aos::print( aos_Point3_2, "aos type 2" );
+    Aos::print( aos_Point3_3, "aos type 3" );
+    a_Point3 = select( a_Point3, b_Point3, ((vec_uint4){0,0xffffffff,0,0xffffffff}) );
+    a_Point3.get4Aos( aos_Point3_0, aos_Point3_1, aos_Point3_2, aos_Point3_3 );
+    Aos::print( aos_Point3_0, "select 0" );
+    Aos::print( aos_Point3_1, "select 1" );
+    Aos::print( aos_Point3_2, "select 2" );
+    Aos::print( aos_Point3_3, "select 3" );
+    loadXYZArray( a_Point3, (const vec_float4 *)xyz4 );
+    print( a_Point3, "load XYZ array" );
+    a_Point3 = Point3( ( -Vector3( a_Point3 ) ) );
+    storeXYZArray( a_Point3, (vec_float4 *)xyz4 );
+    aos_Vector4_0 = Aos::Vector4( xyz4[0], xyz4[1], xyz4[2], xyz4[3] );
+    aos_Vector4_1 = Aos::Vector4( xyz4[4], xyz4[5], xyz4[6], xyz4[7] );
+    aos_Vector4_2 = Aos::Vector4( xyz4[8], xyz4[9], xyz4[10], xyz4[11] );
+    Aos::print( aos_Vector4_0, "xyzx" );
+    Aos::print( aos_Vector4_1, "yzxy" );
+    Aos::print( aos_Vector4_2, "zxyz" );
+    Aos::print( aos_Vector4_0, "xyzx" );
+    Aos::print( aos_Vector4_1, "yzxy" );
+    Aos::print( aos_Vector4_2, "zxyz" );
+    printf("storeXYZ:-1.0 -2.0 -3.0 0.4\n");
+    a_Point3 = b_Point3;
+    print( a_Point3, "assign to Point3 from Point3" );
+    a_Point3 = Point3( (vec_float4){0.0f} );
+    print( a_Point3, "set Point3 elements to zero" );
+    a_Point3.setElem( 0, randfloat() );
+    print( a_Point3, "Point3::set( 0, float )" );
+    a_Point3[0] = randfloat();
+    a_Point3[0] = vec_mul_float( a_Point3[0], randfloat() );
+    a_Point3[0] = divf4( a_Point3[0], randfloat() );
+    a_Point3[0] = vec_add_float( a_Point3[0], randfloat() );
+    a_Point3[0] = vec_sub_float( a_Point3[0], randfloat() );
+    print( a_Point3, "Point3::operator [](0)" );
+    a_Point3.setX( randfloat() );
+    print( a_Point3, "Point3::setX()" );
+    a_Point3.setElem( 1, randfloat() );
+    print( a_Point3, "Point3::set( 1, float )" );
+    a_Point3[1] = randfloat();
+    a_Point3[1] = vec_mul_float( a_Point3[1], randfloat() );
+    a_Point3[1] = divf4( a_Point3[1], randfloat() );
+    a_Point3[1] = vec_add_float( a_Point3[1], randfloat() );
+    a_Point3[1] = vec_sub_float( a_Point3[1], randfloat() );
+    print( a_Point3, "Point3::operator [](1)" );
+    a_Point3.setY( randfloat() );
+    print( a_Point3, "Point3::setY()" );
+    a_Point3.setElem( 2, randfloat() );
+    print( a_Point3, "Point3::set( 2, float )" );
+    a_Point3[2] = randfloat();
+    a_Point3[2] = vec_mul_float( a_Point3[2], randfloat() );
+    a_Point3[2] = divf4( a_Point3[2], randfloat() );
+    a_Point3[2] = vec_add_float( a_Point3[2], randfloat() );
+    a_Point3[2] = vec_sub_float( a_Point3[2], randfloat() );
+    print( a_Point3, "Point3::operator [](2)" );
+    a_Point3.setZ( randfloat() );
+    print( a_Point3, "Point3::setZ()" );
+    printf("Point3::get( 0 ): %f\n", getfloat(a_Point3.getElem( 0 )) );
+    printf("Point3::operator []( 0 ): %f\n", getfloat((vec_float4)a_Point3[0]) );
+    printf("Point3::getX(): %f\n", getfloat(a_Point3.getX( )) );
+    printf("Point3::get( 1 ): %f\n", getfloat(a_Point3.getElem( 1 )) );
+    printf("Point3::operator []( 1 ): %f\n", getfloat((vec_float4)a_Point3[1]) );
+    printf("Point3::getY(): %f\n", getfloat(a_Point3.getY( )) );
+    printf("Point3::get( 2 ): %f\n", getfloat(a_Point3.getElem( 2 )) );
+    printf("Point3::operator []( 2 ): %f\n", getfloat((vec_float4)a_Point3[2]) );
+    printf("Point3::getZ(): %f\n", getfloat(a_Point3.getZ( )) );
+    print( ( a_Point3 - b_Point3 ), "Point3 - Point3" );
+    print( ( a_Point3 + b_Vector3 ), "Point3 + Vector3" );
+    print( ( a_Point3 - b_Vector3 ), "Point3 - Vector3" );
+    print( mulPerElem( a_Point3, b_Point3 ), "mulPerElem( Point3, Point3 )" );
+    print( divPerElem( a_Point3, b_Point3 ), "divPerElem( Point3, Point3 )" );
+    print( recipPerElem( a_Point3 ), "Point3 recip" );
+    print( sqrtPerElem( absPerElem( a_Point3 ) ), "Point3 sqrt" );
+    print( rsqrtPerElem( absPerElem( a_Point3 ) ), "Point3 rsqrt" );
+    print( absPerElem( a_Point3 ), "Point3 abs" );
+    print( copySignPerElem( a_Point3, b_Point3 ), "Point3 copySign" );
+    print( maxPerElem( a_Point3, b_Point3 ), "Point3 maximum Point3" );
+    print( minPerElem( a_Point3, b_Point3 ), "Point3 minimum Point3" );
+    printf("Point3 maximum of elements: %f\n", getfloat(maxElem( a_Point3 )));
+    printf("Point3 minimum of elements: %f\n", getfloat(minElem( a_Point3 )));
+    printf("Point3 sum of elements: %f\n", getfloat(sum( a_Point3 )));
+    printf("Point projection: %f\n", getfloat(projection( a_Point3, b_Vector3 )));
+    printf("Point distSqrFromOrigin: %f\n", getfloat(distSqrFromOrigin( a_Point3 )) );
+    printf("Point distFromOrigin: %f\n", getfloat(distFromOrigin( a_Point3 )) );
+    printf("Point distSqr: %f\n", getfloat(distSqr( a_Point3, b_Point3 )) );
+    printf("Point dist: %f\n", getfloat(dist( a_Point3, b_Point3 )) );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    e_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    a_Point3 = lerp( randfloat(), b_Point3, c_Point3 );
+    print( a_Point3, "Point3 lerp" );
+}
+
+void
+Quat_methods_test()
+{
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3, e_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4, e_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3, e_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat, e_Quat;
+    Aos::Quat aos_Quat_0, aos_Quat_1, aos_Quat_2, aos_Quat_3;
+    Quat soa_Quat;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    c_Vector3 = Vector3( (vec_float4){0.0f} );
+    d_Vector3 = Vector3( (vec_float4){0.0f} );
+    e_Vector3 = Vector3( (vec_float4){0.0f} );
+    print( c_Vector3, "set Vector3 elements to zero" );
+    print( d_Vector3, "set Vector3 elements to zero" );
+    print( e_Vector3, "set Vector3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    c_Vector4 = Vector4( (vec_float4){0.0f} );
+    d_Vector4 = Vector4( (vec_float4){0.0f} );
+    e_Vector4 = Vector4( (vec_float4){0.0f} );
+    print( c_Vector4, "set Vector4 elements to zero" );
+    print( d_Vector4, "set Vector4 elements to zero" );
+    print( e_Vector4, "set Vector4 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    b_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    c_Point3 = Point3( (vec_float4){0.0f} );
+    d_Point3 = Point3( (vec_float4){0.0f} );
+    e_Point3 = Point3( (vec_float4){0.0f} );
+    print( c_Point3, "set Point3 elements to zero" );
+    print( d_Point3, "set Point3 elements to zero" );
+    print( e_Point3, "set Point3 elements to zero" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    c_Quat = Quat( (vec_float4){0.0f} );
+    d_Quat = Quat( (vec_float4){0.0f} );
+    e_Quat = Quat( (vec_float4){0.0f} );
+    print( c_Quat, "set Quat elements to zero" );
+    print( d_Quat, "set Quat elements to zero" );
+    print( e_Quat, "set Quat elements to zero" );
+    a_Quat = Quat( a_Vector3, randfloat() );
+    print( a_Quat, "set Quat with Vector3, float" );
+    a_Quat = Quat( a_Vector4 );
+    print( a_Quat, "construct Quat with Vector4" );
+    a_Quat = Quat( randfloat() );
+    print( a_Quat, "set Quat with float" );
+    a_Quat = Quat( randfloat() );
+    print( a_Quat, "set Quat with float" );
+    aos_Quat_0 = Aos::Quat( 0.0f, 1.0f, 2.0f, 3.0f );
+    aos_Quat_1 = Aos::Quat( 4.0f, 5.0f, 6.0f, 7.0f );
+    aos_Quat_2 = Aos::Quat( 8.0f, 9.0f, 10.0f, 11.0f );
+    aos_Quat_3 = Aos::Quat( 12.0f, 13.0f, 14.0f, 15.0f );
+    soa_Quat = Quat( aos_Quat_0, aos_Quat_1, aos_Quat_2, aos_Quat_3 );
+    soa_Quat.get4Aos( aos_Quat_3, aos_Quat_2, aos_Quat_1, aos_Quat_0 );
+    Aos::print( aos_Quat_0, "aos type 0" );
+    Aos::print( aos_Quat_1, "aos type 1" );
+    Aos::print( aos_Quat_2, "aos type 2" );
+    Aos::print( aos_Quat_3, "aos type 3" );
+    a_Quat = select( a_Quat, b_Quat, ((vec_uint4){0,0xffffffff,0,0xffffffff}) );
+    a_Quat.get4Aos( aos_Quat_0, aos_Quat_1, aos_Quat_2, aos_Quat_3 );
+    Aos::print( aos_Quat_0, "select 0" );
+    Aos::print( aos_Quat_1, "select 1" );
+    Aos::print( aos_Quat_2, "select 2" );
+    Aos::print( aos_Quat_3, "select 3" );
+    a_Quat = b_Quat;
+    print( a_Quat, "assign to Quat from Quat" );
+    a_Quat.setXYZ( a_Vector3 );
+    print( a_Quat, "set Quat xyz" );
+    print( a_Quat.getXYZ( ), "get Quat xyz" );
+    a_Quat = Quat( (vec_float4){0.0f} );
+    print( a_Quat, "set Quat elements to zero" );
+    a_Quat.setElem( 0, randfloat() );
+    print( a_Quat, "Quat::set( 0, float )" );
+    a_Quat[0] = randfloat();
+    a_Quat[0] = vec_mul_float( a_Quat[0], randfloat() );
+    a_Quat[0] = divf4( a_Quat[0], randfloat() );
+    a_Quat[0] = vec_add_float( a_Quat[0], randfloat() );
+    a_Quat[0] = vec_sub_float( a_Quat[0], randfloat() );
+    print( a_Quat, "Quat::operator [](0)" );
+    a_Quat.setX( randfloat() );
+    print( a_Quat, "Quat::setX()" );
+    a_Quat.setElem( 1, randfloat() );
+    print( a_Quat, "Quat::set( 1, float )" );
+    a_Quat[1] = randfloat();
+    a_Quat[1] = vec_mul_float( a_Quat[1], randfloat() );
+    a_Quat[1] = divf4( a_Quat[1], randfloat() );
+    a_Quat[1] = vec_add_float( a_Quat[1], randfloat() );
+    a_Quat[1] = vec_sub_float( a_Quat[1], randfloat() );
+    print( a_Quat, "Quat::operator [](1)" );
+    a_Quat.setY( randfloat() );
+    print( a_Quat, "Quat::setY()" );
+    a_Quat.setElem( 2, randfloat() );
+    print( a_Quat, "Quat::set( 2, float )" );
+    a_Quat[2] = randfloat();
+    a_Quat[2] = vec_mul_float( a_Quat[2], randfloat() );
+    a_Quat[2] = divf4( a_Quat[2], randfloat() );
+    a_Quat[2] = vec_add_float( a_Quat[2], randfloat() );
+    a_Quat[2] = vec_sub_float( a_Quat[2], randfloat() );
+    print( a_Quat, "Quat::operator [](2)" );
+    a_Quat.setZ( randfloat() );
+    print( a_Quat, "Quat::setZ()" );
+    a_Quat.setElem( 3, randfloat() );
+    print( a_Quat, "Quat::set( 3, float )" );
+    a_Quat[3] = randfloat();
+    a_Quat[3] = vec_mul_float( a_Quat[3], randfloat() );
+    a_Quat[3] = divf4( a_Quat[3], randfloat() );
+    a_Quat[3] = vec_add_float( a_Quat[3], randfloat() );
+    a_Quat[3] = vec_sub_float( a_Quat[3], randfloat() );
+    print( a_Quat, "Quat::operator [](3)" );
+    a_Quat.setW( randfloat() );
+    print( a_Quat, "Quat::setW()" );
+    printf("Quat::get( 0 ): %f\n", getfloat(a_Quat.getElem( 0 )) );
+    printf("Quat::operator []( 0 ): %f\n", getfloat((vec_float4)a_Quat[0]) );
+    printf("Quat::getX(): %f\n", getfloat(a_Quat.getX( )) );
+    printf("Quat::get( 1 ): %f\n", getfloat(a_Quat.getElem( 1 )) );
+    printf("Quat::operator []( 1 ): %f\n", getfloat((vec_float4)a_Quat[1]) );
+    printf("Quat::getY(): %f\n", getfloat(a_Quat.getY( )) );
+    printf("Quat::get( 2 ): %f\n", getfloat(a_Quat.getElem( 2 )) );
+    printf("Quat::operator []( 2 ): %f\n", getfloat((vec_float4)a_Quat[2]) );
+    printf("Quat::getZ(): %f\n", getfloat(a_Quat.getZ( )) );
+    printf("Quat::get( 3 ): %f\n", getfloat(a_Quat.getElem( 3 )) );
+    printf("Quat::operator []( 3 ): %f\n", getfloat((vec_float4)a_Quat[3]) );
+    printf("Quat::getW(): %f\n", getfloat(a_Quat.getW( )) );
+    print( ( a_Quat + b_Quat ), "Quat + Quat" );
+    print( ( a_Quat - b_Quat ), "Quat - Quat" );
+    print( ( a_Quat * b_Quat ), "Quat * Quat" );
+    print( ( a_Quat * randfloat() ), "Quat * float" );
+    print( ( a_Quat / randfloat() ), "Quat / float" );
+    print( ( randfloat() * a_Quat ), "float * Quat" );
+    print( ( -a_Quat ), "Quat negate" );
+    printf("Quat dot Quat: %f\n", getfloat(dot( a_Quat, b_Quat )));
+    printf("Quat lengthSqr: %f\n", getfloat(norm( a_Quat )));
+    printf("Quat length: %f\n", getfloat(length( a_Quat )));
+    print( normalize( a_Quat ), "Quat normalized" );
+    a_Quat = Quat::identity( );
+    print( a_Quat, "set to identity" );
+    a_Quat = Quat::rotation( a_Vector3, b_Vector3 );
+    print( a_Quat, "Quat rotation between vectors" );
+    a_Quat = Quat::rotation( randfloat(), a_Vector3 );
+    print( a_Quat, "Quat rotation axis angle" );
+    a_Quat = Quat::rotationX( randfloat() );
+    print( a_Quat, "Quat rotationX" );
+    a_Quat = Quat::rotationY( randfloat() );
+    print( a_Quat, "Quat rotationY" );
+    a_Quat = Quat::rotationZ( randfloat() );
+    print( a_Quat, "Quat rotationZ" );
+    print( rotate( a_Quat, a_Vector3 ), "Quat rotate Vector3" );
+    print( conj( a_Quat ), "Quat conj" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    b_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    e_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    b_Quat = normalize( b_Quat );
+    c_Quat = normalize( c_Quat );
+    d_Quat = normalize( d_Quat );
+    e_Quat = normalize( e_Quat );
+    a_Quat = lerp( randfloat(), b_Quat, c_Quat );
+    print( a_Quat, "Quat lerp" );
+    a_Quat = slerp( randfloat(), b_Quat, c_Quat );
+    print( a_Quat, "Quat slerp" );
+    a_Quat = squad( randfloat(), b_Quat, c_Quat, d_Quat, e_Quat );
+    print( a_Quat, "Quat squad" );
+}
+
+int main()
+{
+    printf("\n __begin__ \n");
+    for ( iteration = 0; iteration < 2; iteration++ ) {
+        Vector3_methods_test();
+        Vector4_methods_test();
+        Point3_methods_test();
+        Quat_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test2_aos_c.c b/Extras/vectormathlibrary/tests/test2_aos_c.c
new file mode 100644
index 000000000..27acec0a1
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test2_aos_c.c
@@ -0,0 +1,852 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_AOS_TEST
+
+#include "vectormath_aos.h"
+#include "test.h"
+
+int iteration = 0;
+
+void
+Matrix3_methods_test()
+{
+    VmathMatrix3 a_Matrix3, b_Matrix3;
+    VmathMatrix4 a_Matrix4, b_Matrix4;
+    VmathTransform3 a_Transform3, b_Transform3;
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7;
+    VmathQuat tmpQ_0;
+    VmathVector3 tmpV3_8, tmpV3_9, tmpV3_10, tmpV3_11, tmpV3_12, tmpV3_13, tmpV3_14, tmpV3_15, tmpV3_16, tmpV3_17, tmpV3_18, tmpV3_19, tmpV3_20, tmpV3_21;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &c_Vector3, pad );
+    vmathV4GetXYZ( &c_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &d_Vector3, pad );
+    vmathV4GetXYZ( &d_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathV3MakeFromP3( &tmpV3_4, &c_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_4, pad );
+    vmathV4GetXYZ( &tmpV3_5, &tmpV4 );
+    vmathP3MakeFromV3( &c_Point3, &tmpV3_5 );
+    vmathV3MakeFromP3( &tmpV3_6, &d_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_6, pad );
+    vmathV4GetXYZ( &tmpV3_7, &tmpV4 );
+    vmathP3MakeFromV3( &d_Point3, &tmpV3_7 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQPrints( &c_Quat, "set Quat with floats" );
+    vmathQPrints( &d_Quat, "set Quat with floats" );
+    vmathM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathQNormalize( &tmpQ_0, &a_Quat );
+    vmathM3MakeFromQ( &a_Matrix3, &tmpQ_0 );
+    vmathM3Prints( &a_Matrix3, "construct Matrix3 with Quat" );
+    vmathQMakeFromM3( &a_Quat, &a_Matrix3 );
+    vmathQPrints( &a_Quat, "construct Quat with Matrix3" );
+    vmathM3Copy( &a_Matrix3, &b_Matrix3 );
+    vmathM3Prints( &a_Matrix3, "assign to Matrix3 from Matrix3" );
+    vmathM3MakeFromScalar( &a_Matrix3, randfloat() );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 with float" );
+    vmathM3MakeFromScalar( &a_Matrix3, randfloat() );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 with float" );
+    vmathM3MakeFromScalar( &a_Matrix3, 0.0f );
+    vmathM3Prints( &a_Matrix3, "set elements to zero" );
+    vmathM3MakeIdentity( &a_Matrix3 );
+    vmathM3Prints( &a_Matrix3, "set to identity" );
+    vmathM3MakeRotationX( &a_Matrix3, randfloat() );
+    vmathM3Prints( &a_Matrix3, "set to rotationX" );
+    vmathM3MakeRotationY( &a_Matrix3, randfloat() );
+    vmathM3Prints( &a_Matrix3, "set to rotationY" );
+    vmathM3MakeRotationZ( &a_Matrix3, randfloat() );
+    vmathM3Prints( &a_Matrix3, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &tmpV3_8, rndflt3, rndflt2, rndflt1 );
+    vmathM3MakeRotationZYX( &a_Matrix3, &tmpV3_8 );
+    vmathM3Prints( &a_Matrix3, "set to rotation from Z,Y,X angles" );
+    vmathV3Normalize( &tmpV3_9, &a_Vector3 );
+    vmathM3MakeRotationAxis( &a_Matrix3, randfloat(), &tmpV3_9 );
+    vmathM3Prints( &a_Matrix3, "set to rotation from axis angle" );
+    vmathM3SetCol0( &a_Matrix3, &a_Vector3 );
+    vmathM3Prints( &a_Matrix3, "Matrix3 set col 0" );
+    vmathM3SetCol1( &a_Matrix3, &a_Vector3 );
+    vmathM3Prints( &a_Matrix3, "Matrix3 set col 1" );
+    vmathM3SetCol2( &a_Matrix3, &a_Vector3 );
+    vmathM3Prints( &a_Matrix3, "Matrix3 set col 2" );
+    vmathM3GetCol0( &tmpV3_10, &a_Matrix3 );
+    vmathV3Prints( &tmpV3_10, "Matrix3 get col 0" );
+    vmathM3GetCol1( &tmpV3_11, &a_Matrix3 );
+    vmathV3Prints( &tmpV3_11, "Matrix3 get col 1" );
+    vmathM3GetCol2( &tmpV3_12, &a_Matrix3 );
+    vmathV3Prints( &tmpV3_12, "Matrix3 get col 2" );
+    vmathM3SetCol( &a_Matrix3, 0, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "Matrix3 set col 0" );
+    vmathM3SetCol( &a_Matrix3, 1, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "Matrix3 set col 1" );
+    vmathM3SetCol( &a_Matrix3, 2, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "Matrix3 set col 2" );
+    vmathM3GetCol( &tmpV3_13, &a_Matrix3, 0 );
+    vmathV3Prints( &tmpV3_13, "Matrix3 get col 0" );
+    vmathM3GetCol( &tmpV3_14, &a_Matrix3, 1 );
+    vmathV3Prints( &tmpV3_14, "Matrix3 get col 1" );
+    vmathM3GetCol( &tmpV3_15, &a_Matrix3, 2 );
+    vmathV3Prints( &tmpV3_15, "Matrix3 get col 2" );
+    vmathM3SetRow( &a_Matrix3, 0, &a_Vector3 );
+    vmathM3Prints( &a_Matrix3, "Matrix3 set row 0" );
+    vmathM3SetRow( &a_Matrix3, 1, &a_Vector3 );
+    vmathM3Prints( &a_Matrix3, "Matrix3 set row 1" );
+    vmathM3SetRow( &a_Matrix3, 2, &a_Vector3 );
+    vmathM3Prints( &a_Matrix3, "Matrix3 set row 2" );
+    vmathM3GetRow( &tmpV3_16, &a_Matrix3, 0 );
+    vmathV3Prints( &tmpV3_16, "Matrix3 get row 0" );
+    vmathM3GetRow( &tmpV3_17, &a_Matrix3, 1 );
+    vmathV3Prints( &tmpV3_17, "Matrix3 get row 1" );
+    vmathM3GetRow( &tmpV3_18, &a_Matrix3, 2 );
+    vmathV3Prints( &tmpV3_18, "Matrix3 get row 2" );
+    vmathM3SetCol( &a_Matrix3, 0, &a_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set " );
+    vmathM3SetCol( &a_Matrix3, 1, &a_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set " );
+    vmathM3SetCol( &a_Matrix3, 2, &a_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set " );
+    vmathM3GetCol( &tmpV3_19, &a_Matrix3, 0 );
+    vmathV3Prints( &tmpV3_19, "get " );
+    vmathM3GetCol( &tmpV3_20, &a_Matrix3, 1 );
+    vmathV3Prints( &tmpV3_20, "get " );
+    vmathM3GetCol( &tmpV3_21, &a_Matrix3, 2 );
+    vmathV3Prints( &tmpV3_21, "get " );
+    vmathM3SetElem( &a_Matrix3, 0, 0, randfloat() );
+    vmathM3SetElem( &a_Matrix3, 0, 1, randfloat() );
+    vmathM3SetElem( &a_Matrix3, 0, 2, randfloat() );
+    vmathM3SetElem( &a_Matrix3, 1, 0, randfloat() );
+    vmathM3SetElem( &a_Matrix3, 1, 1, randfloat() );
+    vmathM3SetElem( &a_Matrix3, 1, 2, randfloat() );
+    vmathM3SetElem( &a_Matrix3, 2, 0, randfloat() );
+    vmathM3SetElem( &a_Matrix3, 2, 1, randfloat() );
+    vmathM3SetElem( &a_Matrix3, 2, 2, randfloat() );
+    vmathM3Prints( &a_Matrix3, "Matrix3 set elements" );
+    printf("%f\n", getfloat(vmathM3GetElem( &a_Matrix3, 0, 0 )) );
+    printf("%f\n", getfloat(vmathM3GetElem( &a_Matrix3, 0, 1 )) );
+    printf("%f\n", getfloat(vmathM3GetElem( &a_Matrix3, 0, 2 )) );
+    printf("%f\n", getfloat(vmathM3GetElem( &a_Matrix3, 1, 0 )) );
+    printf("%f\n", getfloat(vmathM3GetElem( &a_Matrix3, 1, 1 )) );
+    printf("%f\n", getfloat(vmathM3GetElem( &a_Matrix3, 1, 2 )) );
+    printf("%f\n", getfloat(vmathM3GetElem( &a_Matrix3, 2, 0 )) );
+    printf("%f\n", getfloat(vmathM3GetElem( &a_Matrix3, 2, 1 )) );
+    printf("%f\n", getfloat(vmathM3GetElem( &a_Matrix3, 2, 2 )) );
+}
+
+void
+Matrix4_methods_test()
+{
+    VmathMatrix3 a_Matrix3, b_Matrix3;
+    VmathMatrix4 a_Matrix4, b_Matrix4;
+    VmathTransform3 a_Transform3, b_Transform3;
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7, tmpV3_8, tmpV3_9;
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3, tmpV4_4, tmpV4_5, tmpV4_6, tmpV4_7, tmpV4_8, tmpV4_9, tmpV4_10, tmpV4_11, tmpV4_12, tmpV4_13, tmpV4_14, tmpV4_15;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &c_Vector3, pad );
+    vmathV4GetXYZ( &c_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &d_Vector3, pad );
+    vmathV4GetXYZ( &d_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathV3MakeFromP3( &tmpV3_4, &c_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_4, pad );
+    vmathV4GetXYZ( &tmpV3_5, &tmpV4 );
+    vmathP3MakeFromV3( &c_Point3, &tmpV3_5 );
+    vmathV3MakeFromP3( &tmpV3_6, &d_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_6, pad );
+    vmathV4GetXYZ( &tmpV3_7, &tmpV4 );
+    vmathP3MakeFromV3( &d_Point3, &tmpV3_7 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQPrints( &c_Quat, "set Quat with floats" );
+    vmathQPrints( &d_Quat, "set Quat with floats" );
+    vmathM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathM4MakeFromT3( &a_Matrix4, &a_Transform3 );
+    vmathM4Prints( &a_Matrix4, "construct Matrix4 with Transform3" );
+    vmathM4MakeFromM3V3( &a_Matrix4, &a_Matrix3, &a_Vector3 );
+    vmathM4Prints( &a_Matrix4, "construct Matrix4 with Matrix3 and Vector3" );
+    vmathM4MakeFromQV3( &a_Matrix4, &a_Quat, &a_Vector3 );
+    vmathM4Prints( &a_Matrix4, "construct Matrix4 with Quat and Vector3" );
+    vmathM4Copy( &a_Matrix4, &b_Matrix4 );
+    vmathM4Prints( &a_Matrix4, "assign to Matrix4 from Matrix4" );
+    vmathM4MakeFromScalar( &a_Matrix4, randfloat() );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 with float" );
+    vmathM4MakeFromScalar( &a_Matrix4, randfloat() );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 with float" );
+    vmathM4MakeFromScalar( &a_Matrix4, 0.0f );
+    vmathM4Prints( &a_Matrix4, "set elements to zero" );
+    vmathM4MakeIdentity( &a_Matrix4 );
+    vmathM4Prints( &a_Matrix4, "set to identity" );
+    vmathM4MakeRotationX( &a_Matrix4, randfloat() );
+    vmathM4Prints( &a_Matrix4, "set to rotationX" );
+    vmathM4MakeRotationY( &a_Matrix4, randfloat() );
+    vmathM4Prints( &a_Matrix4, "set to rotationY" );
+    vmathM4MakeRotationZ( &a_Matrix4, randfloat() );
+    vmathM4Prints( &a_Matrix4, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &tmpV3_8, rndflt3, rndflt2, rndflt1 );
+    vmathM4MakeRotationZYX( &a_Matrix4, &tmpV3_8 );
+    vmathM4Prints( &a_Matrix4, "set to rotation from Z,Y,X angles" );
+    vmathV3Normalize( &tmpV3_9, &a_Vector3 );
+    vmathM4MakeRotationAxis( &a_Matrix4, randfloat(), &tmpV3_9 );
+    vmathM4Prints( &a_Matrix4, "set to rotation from axis angle" );
+    vmathM4MakeTranslation( &a_Matrix4, &a_Vector3 );
+    vmathM4Prints( &a_Matrix4, "set to translation" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathM4MakePerspective( &a_Matrix4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathM4Prints( &a_Matrix4, "set to perspective matrix" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathM4MakeFrustum( &a_Matrix4, rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6 );
+    vmathM4Prints( &a_Matrix4, "set to frustum matrix" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathM4MakeOrthographic( &a_Matrix4, rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6 );
+    vmathM4Prints( &a_Matrix4, "set to orthographic matrix" );
+    vmathM4MakeLookAt( &a_Matrix4, &a_Point3, &b_Point3, &a_Vector3 );
+    vmathM4Prints( &a_Matrix4, "set to look-at matrix" );
+    vmathM4SetCol0( &a_Matrix4, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set col 0" );
+    vmathM4SetCol1( &a_Matrix4, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set col 1" );
+    vmathM4SetCol2( &a_Matrix4, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set col 2" );
+    vmathM4SetCol3( &a_Matrix4, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set col 3" );
+    vmathM4GetCol0( &tmpV4_0, &a_Matrix4 );
+    vmathV4Prints( &tmpV4_0, "Matrix4 get col 0" );
+    vmathM4GetCol1( &tmpV4_1, &a_Matrix4 );
+    vmathV4Prints( &tmpV4_1, "Matrix4 get col 1" );
+    vmathM4GetCol2( &tmpV4_2, &a_Matrix4 );
+    vmathV4Prints( &tmpV4_2, "Matrix4 get col 2" );
+    vmathM4GetCol3( &tmpV4_3, &a_Matrix4 );
+    vmathV4Prints( &tmpV4_3, "Matrix4 get col 3" );
+    vmathM4SetCol( &a_Matrix4, 0, &b_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set col 0" );
+    vmathM4SetCol( &a_Matrix4, 1, &b_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set col 1" );
+    vmathM4SetCol( &a_Matrix4, 2, &b_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set col 2" );
+    vmathM4SetCol( &a_Matrix4, 3, &b_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set col 3" );
+    vmathM4GetCol( &tmpV4_4, &a_Matrix4, 0 );
+    vmathV4Prints( &tmpV4_4, "Matrix4 get col 0" );
+    vmathM4GetCol( &tmpV4_5, &a_Matrix4, 1 );
+    vmathV4Prints( &tmpV4_5, "Matrix4 get col 1" );
+    vmathM4GetCol( &tmpV4_6, &a_Matrix4, 2 );
+    vmathV4Prints( &tmpV4_6, "Matrix4 get col 2" );
+    vmathM4GetCol( &tmpV4_7, &a_Matrix4, 3 );
+    vmathV4Prints( &tmpV4_7, "Matrix4 get col 3" );
+    vmathM4SetRow( &a_Matrix4, 0, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set row 0" );
+    vmathM4SetRow( &a_Matrix4, 1, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set row 1" );
+    vmathM4SetRow( &a_Matrix4, 2, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set row 2" );
+    vmathM4SetRow( &a_Matrix4, 3, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set row 3" );
+    vmathM4GetRow( &tmpV4_8, &a_Matrix4, 0 );
+    vmathV4Prints( &tmpV4_8, "Matrix4 get row 0" );
+    vmathM4GetRow( &tmpV4_9, &a_Matrix4, 1 );
+    vmathV4Prints( &tmpV4_9, "Matrix4 get row 1" );
+    vmathM4GetRow( &tmpV4_10, &a_Matrix4, 2 );
+    vmathV4Prints( &tmpV4_10, "Matrix4 get row 2" );
+    vmathM4GetRow( &tmpV4_11, &a_Matrix4, 3 );
+    vmathV4Prints( &tmpV4_11, "Matrix4 get row 3" );
+    vmathM4SetCol( &a_Matrix4, 0, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set " );
+    vmathM4SetCol( &a_Matrix4, 1, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set " );
+    vmathM4SetCol( &a_Matrix4, 2, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set " );
+    vmathM4SetCol( &a_Matrix4, 3, &a_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set " );
+    vmathM4GetCol( &tmpV4_12, &a_Matrix4, 0 );
+    vmathV4Prints( &tmpV4_12, "get " );
+    vmathM4GetCol( &tmpV4_13, &a_Matrix4, 1 );
+    vmathV4Prints( &tmpV4_13, "get " );
+    vmathM4GetCol( &tmpV4_14, &a_Matrix4, 2 );
+    vmathV4Prints( &tmpV4_14, "get " );
+    vmathM4GetCol( &tmpV4_15, &a_Matrix4, 3 );
+    vmathV4Prints( &tmpV4_15, "get " );
+    vmathM4SetElem( &a_Matrix4, 0, 0, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 0, 1, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 0, 2, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 0, 3, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 1, 0, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 1, 1, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 1, 2, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 1, 3, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 2, 0, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 2, 1, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 2, 2, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 2, 3, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 3, 0, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 3, 1, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 3, 2, randfloat() );
+    vmathM4SetElem( &a_Matrix4, 3, 3, randfloat() );
+    vmathM4Prints( &a_Matrix4, "Matrix4 set elements" );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 0, 0 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 0, 1 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 0, 2 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 0, 3 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 1, 0 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 1, 1 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 1, 2 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 1, 3 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 2, 0 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 2, 1 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 2, 2 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 2, 3 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 3, 0 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 3, 1 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 3, 2 )) );
+    printf("%f\n", getfloat(vmathM4GetElem( &a_Matrix4, 3, 3 )) );
+}
+
+void
+Transform3_methods_test()
+{
+    VmathMatrix3 a_Matrix3, b_Matrix3;
+    VmathMatrix4 a_Matrix4, b_Matrix4;
+    VmathTransform3 a_Transform3, b_Transform3;
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7, tmpV3_8, tmpV3_9, tmpV3_10, tmpV3_11, tmpV3_12, tmpV3_13, tmpV3_14, tmpV3_15, tmpV3_16, tmpV3_17;
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
+    VmathVector3 tmpV3_18, tmpV3_19, tmpV3_20, tmpV3_21;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &c_Vector3, pad );
+    vmathV4GetXYZ( &c_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &d_Vector3, pad );
+    vmathV4GetXYZ( &d_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathV3MakeFromP3( &tmpV3_4, &c_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_4, pad );
+    vmathV4GetXYZ( &tmpV3_5, &tmpV4 );
+    vmathP3MakeFromV3( &c_Point3, &tmpV3_5 );
+    vmathV3MakeFromP3( &tmpV3_6, &d_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_6, pad );
+    vmathV4GetXYZ( &tmpV3_7, &tmpV4 );
+    vmathP3MakeFromV3( &d_Point3, &tmpV3_7 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQPrints( &c_Quat, "set Quat with floats" );
+    vmathQPrints( &d_Quat, "set Quat with floats" );
+    vmathM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathT3MakeFromM3V3( &a_Transform3, &a_Matrix3, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "construct Transform3 with Matrix3 and Vector3" );
+    vmathT3MakeFromQV3( &a_Transform3, &a_Quat, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "construct Transform3 with Quat and Vector3" );
+    vmathT3Copy( &a_Transform3, &b_Transform3 );
+    vmathT3Prints( &a_Transform3, "assign to Transform3 from Transform3" );
+    vmathT3MakeFromScalar( &a_Transform3, randfloat() );
+    vmathT3Prints( &a_Transform3, "set Transform3 with float" );
+    vmathT3MakeFromScalar( &a_Transform3, randfloat() );
+    vmathT3Prints( &a_Transform3, "set Transform3 with float" );
+    vmathT3MakeFromScalar( &a_Transform3, 0.0f );
+    vmathT3Prints( &a_Transform3, "set elements to zero" );
+    vmathT3MakeIdentity( &a_Transform3 );
+    vmathT3Prints( &a_Transform3, "set to identity" );
+    vmathT3MakeRotationX( &a_Transform3, randfloat() );
+    vmathT3Prints( &a_Transform3, "set to rotationX" );
+    vmathT3MakeRotationY( &a_Transform3, randfloat() );
+    vmathT3Prints( &a_Transform3, "set to rotationY" );
+    vmathT3MakeRotationZ( &a_Transform3, randfloat() );
+    vmathT3Prints( &a_Transform3, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &tmpV3_8, rndflt3, rndflt2, rndflt1 );
+    vmathT3MakeRotationZYX( &a_Transform3, &tmpV3_8 );
+    vmathT3Prints( &a_Transform3, "set to rotation from Z,Y,X angles" );
+    vmathV3Normalize( &tmpV3_9, &a_Vector3 );
+    vmathT3MakeRotationAxis( &a_Transform3, randfloat(), &tmpV3_9 );
+    vmathT3Prints( &a_Transform3, "set to rotation from axis angle" );
+    vmathT3MakeTranslation( &a_Transform3, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "set to translation" );
+    vmathT3SetCol0( &a_Transform3, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "Transform3 set col 0" );
+    vmathT3SetCol1( &a_Transform3, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "Transform3 set col 1" );
+    vmathT3SetCol2( &a_Transform3, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "Transform3 set col 2" );
+    vmathT3SetCol3( &a_Transform3, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "Transform3 set col 3" );
+    vmathT3GetCol0( &tmpV3_10, &a_Transform3 );
+    vmathV3Prints( &tmpV3_10, "Transform3 get col 0" );
+    vmathT3GetCol1( &tmpV3_11, &a_Transform3 );
+    vmathV3Prints( &tmpV3_11, "Transform3 get col 1" );
+    vmathT3GetCol2( &tmpV3_12, &a_Transform3 );
+    vmathV3Prints( &tmpV3_12, "Transform3 get col 2" );
+    vmathT3GetCol3( &tmpV3_13, &a_Transform3 );
+    vmathV3Prints( &tmpV3_13, "Transform3 get col 3" );
+    vmathT3SetCol( &a_Transform3, 0, &b_Vector3 );
+    vmathT3Prints( &a_Transform3, "Transform3 set col 0" );
+    vmathT3SetCol( &a_Transform3, 1, &b_Vector3 );
+    vmathT3Prints( &a_Transform3, "Transform3 set col 1" );
+    vmathT3SetCol( &a_Transform3, 2, &b_Vector3 );
+    vmathT3Prints( &a_Transform3, "Transform3 set col 2" );
+    vmathT3SetCol( &a_Transform3, 3, &b_Vector3 );
+    vmathT3Prints( &a_Transform3, "Transform3 set col 3" );
+    vmathT3GetCol( &tmpV3_14, &a_Transform3, 0 );
+    vmathV3Prints( &tmpV3_14, "Transform3 get col 0" );
+    vmathT3GetCol( &tmpV3_15, &a_Transform3, 1 );
+    vmathV3Prints( &tmpV3_15, "Transform3 get col 1" );
+    vmathT3GetCol( &tmpV3_16, &a_Transform3, 2 );
+    vmathV3Prints( &tmpV3_16, "Transform3 get col 2" );
+    vmathT3GetCol( &tmpV3_17, &a_Transform3, 3 );
+    vmathV3Prints( &tmpV3_17, "Transform3 get col 3" );
+    vmathT3SetRow( &a_Transform3, 0, &a_Vector4 );
+    vmathT3Prints( &a_Transform3, "Transform3 set row 0" );
+    vmathT3SetRow( &a_Transform3, 1, &a_Vector4 );
+    vmathT3Prints( &a_Transform3, "Transform3 set row 1" );
+    vmathT3SetRow( &a_Transform3, 2, &a_Vector4 );
+    vmathT3Prints( &a_Transform3, "Transform3 set row 2" );
+    vmathT3GetRow( &tmpV4_0, &a_Transform3, 0 );
+    vmathV4Prints( &tmpV4_0, "Transform3 get row 0" );
+    vmathT3GetRow( &tmpV4_1, &a_Transform3, 1 );
+    vmathV4Prints( &tmpV4_1, "Transform3 get row 1" );
+    vmathT3GetRow( &tmpV4_2, &a_Transform3, 2 );
+    vmathV4Prints( &tmpV4_2, "Transform3 get row 2" );
+    vmathT3SetCol( &a_Transform3, 0, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "set " );
+    vmathT3SetCol( &a_Transform3, 1, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "set " );
+    vmathT3SetCol( &a_Transform3, 2, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "set " );
+    vmathT3SetCol( &a_Transform3, 3, &a_Vector3 );
+    vmathT3Prints( &a_Transform3, "set " );
+    vmathT3GetCol( &tmpV3_18, &a_Transform3, 0 );
+    vmathV3Prints( &tmpV3_18, "get " );
+    vmathT3GetCol( &tmpV3_19, &a_Transform3, 1 );
+    vmathV3Prints( &tmpV3_19, "get " );
+    vmathT3GetCol( &tmpV3_20, &a_Transform3, 2 );
+    vmathV3Prints( &tmpV3_20, "get " );
+    vmathT3GetCol( &tmpV3_21, &a_Transform3, 3 );
+    vmathV3Prints( &tmpV3_21, "get " );
+    vmathT3SetElem( &a_Transform3, 0, 0, randfloat() );
+    vmathT3SetElem( &a_Transform3, 0, 1, randfloat() );
+    vmathT3SetElem( &a_Transform3, 0, 2, randfloat() );
+    vmathT3SetElem( &a_Transform3, 1, 0, randfloat() );
+    vmathT3SetElem( &a_Transform3, 1, 1, randfloat() );
+    vmathT3SetElem( &a_Transform3, 1, 2, randfloat() );
+    vmathT3SetElem( &a_Transform3, 2, 0, randfloat() );
+    vmathT3SetElem( &a_Transform3, 2, 1, randfloat() );
+    vmathT3SetElem( &a_Transform3, 2, 2, randfloat() );
+    vmathT3SetElem( &a_Transform3, 3, 0, randfloat() );
+    vmathT3SetElem( &a_Transform3, 3, 1, randfloat() );
+    vmathT3SetElem( &a_Transform3, 3, 2, randfloat() );
+    vmathT3Prints( &a_Transform3, "Transform3 set elements" );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 0, 0 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 0, 1 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 0, 2 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 1, 0 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 1, 1 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 1, 2 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 2, 0 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 2, 1 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 2, 2 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 3, 0 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 3, 1 )) );
+    printf("%f\n", getfloat(vmathT3GetElem( &a_Transform3, 3, 2 )) );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test2_aos_cpp.cpp b/Extras/vectormathlibrary/tests/test2_aos_cpp.cpp
new file mode 100644
index 000000000..6b8884a1b
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test2_aos_cpp.cpp
@@ -0,0 +1,784 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_AOS_TEST
+
+#include "vectormath_aos.h"
+#include "test.h"
+
+int iteration = 0;
+
+using namespace Vectormath;
+using namespace Vectormath::Aos;
+
+void
+Matrix3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    Vector4 tmpV4;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( c_Vector3, pad );
+    c_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( d_Vector3, pad );
+    d_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( c_Point3 ), pad );
+    c_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( d_Point3 ), pad );
+    d_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    a_Matrix3 = Matrix3( normalize( a_Quat ) );
+    print( a_Matrix3, "construct Matrix3 with Quat" );
+    a_Quat = Quat( a_Matrix3 );
+    print( a_Quat, "construct Quat with Matrix3" );
+    a_Matrix3 = b_Matrix3;
+    print( a_Matrix3, "assign to Matrix3 from Matrix3" );
+    a_Matrix3 = Matrix3( randfloat() );
+    print( a_Matrix3, "set Matrix3 with float" );
+    a_Matrix3 = Matrix3( scalar_float(randfloat()) );
+    print( a_Matrix3, "set Matrix3 with float" );
+    a_Matrix3 = Matrix3( 0.0f );
+    print( a_Matrix3, "set elements to zero" );
+    a_Matrix3 = Matrix3::identity( );
+    print( a_Matrix3, "set to identity" );
+    a_Matrix3 = Matrix3::rotationX( randfloat() );
+    print( a_Matrix3, "set to rotationX" );
+    a_Matrix3 = Matrix3::rotationY( randfloat() );
+    print( a_Matrix3, "set to rotationY" );
+    a_Matrix3 = Matrix3::rotationZ( randfloat() );
+    print( a_Matrix3, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Matrix3 = Matrix3::rotationZYX( Vector3( rndflt3, rndflt2, rndflt1 ) );
+    print( a_Matrix3, "set to rotation from Z,Y,X angles" );
+    a_Matrix3 = Matrix3::rotation( randfloat(), normalize( a_Vector3 ) );
+    print( a_Matrix3, "set to rotation from axis angle" );
+    a_Matrix3.setCol0( a_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 0" );
+    a_Matrix3.setCol1( a_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 1" );
+    a_Matrix3.setCol2( a_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 2" );
+    print( a_Matrix3.getCol0( ), "Matrix3 get col 0" );
+    print( a_Matrix3.getCol1( ), "Matrix3 get col 1" );
+    print( a_Matrix3.getCol2( ), "Matrix3 get col 2" );
+    a_Matrix3.setCol( 0, b_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 0" );
+    a_Matrix3.setCol( 1, b_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 1" );
+    a_Matrix3.setCol( 2, b_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 2" );
+    print( a_Matrix3.getCol( 0 ), "Matrix3 get col 0" );
+    print( a_Matrix3.getCol( 1 ), "Matrix3 get col 1" );
+    print( a_Matrix3.getCol( 2 ), "Matrix3 get col 2" );
+    a_Matrix3.setRow( 0, a_Vector3 );
+    print( a_Matrix3, "Matrix3 set row 0" );
+    a_Matrix3.setRow( 1, a_Vector3 );
+    print( a_Matrix3, "Matrix3 set row 1" );
+    a_Matrix3.setRow( 2, a_Vector3 );
+    print( a_Matrix3, "Matrix3 set row 2" );
+    print( a_Matrix3.getRow( 0 ), "Matrix3 get row 0" );
+    print( a_Matrix3.getRow( 1 ), "Matrix3 get row 1" );
+    print( a_Matrix3.getRow( 2 ), "Matrix3 get row 2" );
+    a_Matrix3[0] = a_Vector3;
+    print( a_Matrix3, "set Matrix3[0]" );
+    a_Matrix3[1] = a_Vector3;
+    print( a_Matrix3, "set Matrix3[1]" );
+    a_Matrix3[2] = a_Vector3;
+    print( a_Matrix3, "set Matrix3[2]" );
+    a_Matrix3[0] = a_Vector3;
+    print( a_Matrix3[0], "get Matrix3[0]" );
+    a_Matrix3[1] = a_Vector3;
+    print( a_Matrix3[1], "get Matrix3[1]" );
+    a_Matrix3[2] = a_Vector3;
+    print( a_Matrix3[2], "get Matrix3[2]" );
+    a_Matrix3.setElem( 0, 0, randfloat() );
+    a_Matrix3.setElem( 0, 1, randfloat() );
+    a_Matrix3.setElem( 0, 2, randfloat() );
+    a_Matrix3.setElem( 1, 0, randfloat() );
+    a_Matrix3.setElem( 1, 1, randfloat() );
+    a_Matrix3.setElem( 1, 2, randfloat() );
+    a_Matrix3.setElem( 2, 0, randfloat() );
+    a_Matrix3.setElem( 2, 1, randfloat() );
+    a_Matrix3.setElem( 2, 2, randfloat() );
+    print( a_Matrix3, "Matrix3 set elements" );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 0, 0 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 0, 1 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 0, 2 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 1, 0 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 1, 1 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 1, 2 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 2, 0 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 2, 1 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 2, 2 )) );
+}
+
+void
+Matrix4_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    Vector4 tmpV4;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( c_Vector3, pad );
+    c_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( d_Vector3, pad );
+    d_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( c_Point3 ), pad );
+    c_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( d_Point3 ), pad );
+    d_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    a_Matrix4 = Matrix4( a_Transform3 );
+    print( a_Matrix4, "construct Matrix4 with Transform3" );
+    a_Matrix4 = Matrix4( a_Matrix3, a_Vector3 );
+    print( a_Matrix4, "construct Matrix4 with Matrix3 and Vector3" );
+    a_Matrix4 = Matrix4( a_Quat, a_Vector3 );
+    print( a_Matrix4, "construct Matrix4 with Quat and Vector3" );
+    a_Matrix4 = b_Matrix4;
+    print( a_Matrix4, "assign to Matrix4 from Matrix4" );
+    a_Matrix4 = Matrix4( randfloat() );
+    print( a_Matrix4, "set Matrix4 with float" );
+    a_Matrix4 = Matrix4( scalar_float(randfloat()) );
+    print( a_Matrix4, "set Matrix4 with float" );
+    a_Matrix4 = Matrix4( 0.0f );
+    print( a_Matrix4, "set elements to zero" );
+    a_Matrix4 = Matrix4::identity( );
+    print( a_Matrix4, "set to identity" );
+    a_Matrix4 = Matrix4::rotationX( randfloat() );
+    print( a_Matrix4, "set to rotationX" );
+    a_Matrix4 = Matrix4::rotationY( randfloat() );
+    print( a_Matrix4, "set to rotationY" );
+    a_Matrix4 = Matrix4::rotationZ( randfloat() );
+    print( a_Matrix4, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Matrix4 = Matrix4::rotationZYX( Vector3( rndflt3, rndflt2, rndflt1 ) );
+    print( a_Matrix4, "set to rotation from Z,Y,X angles" );
+    a_Matrix4 = Matrix4::rotation( randfloat(), normalize( a_Vector3 ) );
+    print( a_Matrix4, "set to rotation from axis angle" );
+    a_Matrix4 = Matrix4::translation( a_Vector3 );
+    print( a_Matrix4, "set to translation" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Matrix4 = Matrix4::perspective( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Matrix4, "set to perspective matrix" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    a_Matrix4 = Matrix4::frustum( rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6 );
+    print( a_Matrix4, "set to frustum matrix" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    a_Matrix4 = Matrix4::orthographic( rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6 );
+    print( a_Matrix4, "set to orthographic matrix" );
+    a_Matrix4 = Matrix4::lookAt( a_Point3, b_Point3, a_Vector3 );
+    print( a_Matrix4, "set to look-at matrix" );
+    a_Matrix4.setCol0( a_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 0" );
+    a_Matrix4.setCol1( a_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 1" );
+    a_Matrix4.setCol2( a_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 2" );
+    a_Matrix4.setCol3( a_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 3" );
+    print( a_Matrix4.getCol0( ), "Matrix4 get col 0" );
+    print( a_Matrix4.getCol1( ), "Matrix4 get col 1" );
+    print( a_Matrix4.getCol2( ), "Matrix4 get col 2" );
+    print( a_Matrix4.getCol3( ), "Matrix4 get col 3" );
+    a_Matrix4.setCol( 0, b_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 0" );
+    a_Matrix4.setCol( 1, b_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 1" );
+    a_Matrix4.setCol( 2, b_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 2" );
+    a_Matrix4.setCol( 3, b_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 3" );
+    print( a_Matrix4.getCol( 0 ), "Matrix4 get col 0" );
+    print( a_Matrix4.getCol( 1 ), "Matrix4 get col 1" );
+    print( a_Matrix4.getCol( 2 ), "Matrix4 get col 2" );
+    print( a_Matrix4.getCol( 3 ), "Matrix4 get col 3" );
+    a_Matrix4.setRow( 0, a_Vector4 );
+    print( a_Matrix4, "Matrix4 set row 0" );
+    a_Matrix4.setRow( 1, a_Vector4 );
+    print( a_Matrix4, "Matrix4 set row 1" );
+    a_Matrix4.setRow( 2, a_Vector4 );
+    print( a_Matrix4, "Matrix4 set row 2" );
+    a_Matrix4.setRow( 3, a_Vector4 );
+    print( a_Matrix4, "Matrix4 set row 3" );
+    print( a_Matrix4.getRow( 0 ), "Matrix4 get row 0" );
+    print( a_Matrix4.getRow( 1 ), "Matrix4 get row 1" );
+    print( a_Matrix4.getRow( 2 ), "Matrix4 get row 2" );
+    print( a_Matrix4.getRow( 3 ), "Matrix4 get row 3" );
+    a_Matrix4[0] = a_Vector4;
+    print( a_Matrix4, "set Matrix4[0]" );
+    a_Matrix4[1] = a_Vector4;
+    print( a_Matrix4, "set Matrix4[1]" );
+    a_Matrix4[2] = a_Vector4;
+    print( a_Matrix4, "set Matrix4[2]" );
+    a_Matrix4[3] = a_Vector4;
+    print( a_Matrix4, "set Matrix4[3]" );
+    a_Matrix4[0] = a_Vector4;
+    print( a_Matrix4[0], "get Matrix4[0]" );
+    a_Matrix4[1] = a_Vector4;
+    print( a_Matrix4[1], "get Matrix4[1]" );
+    a_Matrix4[2] = a_Vector4;
+    print( a_Matrix4[2], "get Matrix4[2]" );
+    a_Matrix4[3] = a_Vector4;
+    print( a_Matrix4[3], "get Matrix4[3]" );
+    a_Matrix4.setElem( 0, 0, randfloat() );
+    a_Matrix4.setElem( 0, 1, randfloat() );
+    a_Matrix4.setElem( 0, 2, randfloat() );
+    a_Matrix4.setElem( 0, 3, randfloat() );
+    a_Matrix4.setElem( 1, 0, randfloat() );
+    a_Matrix4.setElem( 1, 1, randfloat() );
+    a_Matrix4.setElem( 1, 2, randfloat() );
+    a_Matrix4.setElem( 1, 3, randfloat() );
+    a_Matrix4.setElem( 2, 0, randfloat() );
+    a_Matrix4.setElem( 2, 1, randfloat() );
+    a_Matrix4.setElem( 2, 2, randfloat() );
+    a_Matrix4.setElem( 2, 3, randfloat() );
+    a_Matrix4.setElem( 3, 0, randfloat() );
+    a_Matrix4.setElem( 3, 1, randfloat() );
+    a_Matrix4.setElem( 3, 2, randfloat() );
+    a_Matrix4.setElem( 3, 3, randfloat() );
+    print( a_Matrix4, "Matrix4 set elements" );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 0, 0 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 0, 1 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 0, 2 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 0, 3 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 1, 0 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 1, 1 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 1, 2 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 1, 3 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 2, 0 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 2, 1 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 2, 2 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 2, 3 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 3, 0 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 3, 1 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 3, 2 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 3, 3 )) );
+}
+
+void
+Transform3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    Vector4 tmpV4;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( c_Vector3, pad );
+    c_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( d_Vector3, pad );
+    d_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( c_Point3 ), pad );
+    c_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( d_Point3 ), pad );
+    d_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    a_Transform3 = Transform3( a_Matrix3, a_Vector3 );
+    print( a_Transform3, "construct Transform3 with Matrix3 and Vector3" );
+    a_Transform3 = Transform3( a_Quat, a_Vector3 );
+    print( a_Transform3, "construct Transform3 with Quat and Vector3" );
+    a_Transform3 = b_Transform3;
+    print( a_Transform3, "assign to Transform3 from Transform3" );
+    a_Transform3 = Transform3( randfloat() );
+    print( a_Transform3, "set Transform3 with float" );
+    a_Transform3 = Transform3( scalar_float(randfloat()) );
+    print( a_Transform3, "set Transform3 with float" );
+    a_Transform3 = Transform3( 0.0f );
+    print( a_Transform3, "set elements to zero" );
+    a_Transform3 = Transform3::identity( );
+    print( a_Transform3, "set to identity" );
+    a_Transform3 = Transform3::rotationX( randfloat() );
+    print( a_Transform3, "set to rotationX" );
+    a_Transform3 = Transform3::rotationY( randfloat() );
+    print( a_Transform3, "set to rotationY" );
+    a_Transform3 = Transform3::rotationZ( randfloat() );
+    print( a_Transform3, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Transform3 = Transform3::rotationZYX( Vector3( rndflt3, rndflt2, rndflt1 ) );
+    print( a_Transform3, "set to rotation from Z,Y,X angles" );
+    a_Transform3 = Transform3::rotation( randfloat(), normalize( a_Vector3 ) );
+    print( a_Transform3, "set to rotation from axis angle" );
+    a_Transform3 = Transform3::translation( a_Vector3 );
+    print( a_Transform3, "set to translation" );
+    a_Transform3.setCol0( a_Vector3 );
+    print( a_Transform3, "Transform3 set col 0" );
+    a_Transform3.setCol1( a_Vector3 );
+    print( a_Transform3, "Transform3 set col 1" );
+    a_Transform3.setCol2( a_Vector3 );
+    print( a_Transform3, "Transform3 set col 2" );
+    a_Transform3.setCol3( a_Vector3 );
+    print( a_Transform3, "Transform3 set col 3" );
+    print( a_Transform3.getCol0( ), "Transform3 get col 0" );
+    print( a_Transform3.getCol1( ), "Transform3 get col 1" );
+    print( a_Transform3.getCol2( ), "Transform3 get col 2" );
+    print( a_Transform3.getCol3( ), "Transform3 get col 3" );
+    a_Transform3.setCol( 0, b_Vector3 );
+    print( a_Transform3, "Transform3 set col 0" );
+    a_Transform3.setCol( 1, b_Vector3 );
+    print( a_Transform3, "Transform3 set col 1" );
+    a_Transform3.setCol( 2, b_Vector3 );
+    print( a_Transform3, "Transform3 set col 2" );
+    a_Transform3.setCol( 3, b_Vector3 );
+    print( a_Transform3, "Transform3 set col 3" );
+    print( a_Transform3.getCol( 0 ), "Transform3 get col 0" );
+    print( a_Transform3.getCol( 1 ), "Transform3 get col 1" );
+    print( a_Transform3.getCol( 2 ), "Transform3 get col 2" );
+    print( a_Transform3.getCol( 3 ), "Transform3 get col 3" );
+    a_Transform3.setRow( 0, a_Vector4 );
+    print( a_Transform3, "Transform3 set row 0" );
+    a_Transform3.setRow( 1, a_Vector4 );
+    print( a_Transform3, "Transform3 set row 1" );
+    a_Transform3.setRow( 2, a_Vector4 );
+    print( a_Transform3, "Transform3 set row 2" );
+    print( a_Transform3.getRow( 0 ), "Transform3 get row 0" );
+    print( a_Transform3.getRow( 1 ), "Transform3 get row 1" );
+    print( a_Transform3.getRow( 2 ), "Transform3 get row 2" );
+    a_Transform3[0] = a_Vector3;
+    print( a_Transform3, "set Transform3[0]" );
+    a_Transform3[1] = a_Vector3;
+    print( a_Transform3, "set Transform3[1]" );
+    a_Transform3[2] = a_Vector3;
+    print( a_Transform3, "set Transform3[2]" );
+    a_Transform3[3] = a_Vector3;
+    print( a_Transform3, "set Transform3[3]" );
+    a_Transform3[0] = a_Vector3;
+    print( a_Transform3[0], "get Transform3[0]" );
+    a_Transform3[1] = a_Vector3;
+    print( a_Transform3[1], "get Transform3[1]" );
+    a_Transform3[2] = a_Vector3;
+    print( a_Transform3[2], "get Transform3[2]" );
+    a_Transform3[3] = a_Vector3;
+    print( a_Transform3[3], "get Transform3[3]" );
+    a_Transform3.setElem( 0, 0, randfloat() );
+    a_Transform3.setElem( 0, 1, randfloat() );
+    a_Transform3.setElem( 0, 2, randfloat() );
+    a_Transform3.setElem( 1, 0, randfloat() );
+    a_Transform3.setElem( 1, 1, randfloat() );
+    a_Transform3.setElem( 1, 2, randfloat() );
+    a_Transform3.setElem( 2, 0, randfloat() );
+    a_Transform3.setElem( 2, 1, randfloat() );
+    a_Transform3.setElem( 2, 2, randfloat() );
+    a_Transform3.setElem( 3, 0, randfloat() );
+    a_Transform3.setElem( 3, 1, randfloat() );
+    a_Transform3.setElem( 3, 2, randfloat() );
+    print( a_Transform3, "Transform3 set elements" );
+    printf("%f\n", getfloat(a_Transform3.getElem( 0, 0 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 0, 1 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 0, 2 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 1, 0 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 1, 1 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 1, 2 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 2, 0 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 2, 1 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 2, 2 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 3, 0 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 3, 1 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 3, 2 )) );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test2_reference.txt b/Extras/vectormathlibrary/tests/test2_reference.txt
new file mode 100644
index 000000000..6f885a212
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test2_reference.txt
@@ -0,0 +1,1190 @@
+set Vector3 with floats: ( -0.658344 0.499804 -0.807257 )
+set Vector3 with floats: ( 0.740930 0.154607 0.571599 )
+set Vector3 with floats: ( 0.384388 -0.262467 0.747808 )
+set Vector3 with floats: ( 0.490190 -0.107908 -0.292544 )
+set Vector4 with floats: ( 0.465039 -0.479556 -0.211412 0.553580 )
+set Vector4 with floats: ( 0.690070 0.151576 0.431077 -0.833992 )
+set Vector4 with floats: ( -0.088350 -0.780106 0.090456 -0.218627 )
+set Vector4 with floats: ( 0.137171 0.918133 0.735438 -0.673621 )
+set Point3 with floats: ( -0.448982 -0.479278 0.848189 )
+set Point3 with floats: ( -0.128155 0.578922 -0.744766 )
+set Point3 with floats: ( -0.835589 0.881284 -0.948850 )
+set Point3 with floats: ( -0.691578 -0.235635 -0.690527 )
+set Quat with floats: ( 0.058667 0.753697 -0.138777 -0.472188 )
+set Quat with floats: ( -0.372811 0.540183 -0.785218 0.542085 )
+set Quat with floats: ( 0.410391 -0.562721 0.523588 -0.176574 )
+set Quat with floats: ( 0.297654 0.859913 0.004837 0.374881 )
+set Matrix3 columns:
+( -0.658344 0.740930 0.384388 )
+( 0.499804 0.154607 -0.262467 )
+( -0.807257 0.571599 0.747808 )
+set Matrix3 columns:
+( 0.490190 -0.658344 0.740930 )
+( -0.107908 0.499804 0.154607 )
+( -0.292544 -0.807257 0.571599 )
+set Matrix4 columns:
+( 0.465039 0.690070 -0.088350 0.137171 )
+( -0.479556 0.151576 -0.780106 0.918133 )
+( -0.211412 0.431077 0.090456 0.735438 )
+( 0.553580 -0.833992 -0.218627 -0.673621 )
+set Matrix4 columns:
+( 0.137171 0.465039 0.690070 -0.088350 )
+( 0.918133 -0.479556 0.151576 -0.780106 )
+( 0.735438 -0.211412 0.431077 0.090456 )
+( -0.673621 0.553580 -0.833992 -0.218627 )
+set Transform3 columns:
+( -0.658344 0.740930 0.384388 0.490190 )
+( 0.499804 0.154607 -0.262467 -0.107908 )
+( -0.807257 0.571599 0.747808 -0.292544 )
+set Transform3 columns:
+( 0.490190 -0.658344 0.740930 0.384388 )
+( -0.107908 0.499804 0.154607 -0.262467 )
+( -0.292544 -0.807257 0.571599 0.747808 )
+construct Matrix3 with Quat:
+( -0.443537 -0.052381 -0.894724 )
+( 0.269738 0.944205 -0.188994 )
+( 0.854702 -0.325167 -0.404660 )
+construct Quat with Matrix3: ( -0.065036 -0.835524 0.153844 0.523452 )
+assign to Matrix3 from Matrix3:
+( 0.490190 -0.658344 0.740930 )
+( -0.107908 0.499804 0.154607 )
+( -0.292544 -0.807257 0.571599 )
+set Matrix3 with float:
+( -0.127818 -0.127818 -0.127818 )
+( -0.127818 -0.127818 -0.127818 )
+( -0.127818 -0.127818 -0.127818 )
+set Matrix3 with float:
+( 0.216602 0.216602 0.216602 )
+( 0.216602 0.216602 0.216602 )
+( 0.216602 0.216602 0.216602 )
+set elements to zero:
+( 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 )
+set to identity:
+( 1.000000 0.000000 0.000000 )
+( 0.000000 1.000000 0.000000 )
+( 0.000000 0.000000 1.000000 )
+set to rotationX:
+( 1.000000 0.000000 0.000000 )
+( 0.000000 0.988300 -0.152520 )
+( 0.000000 0.152520 0.988300 )
+set to rotationY:
+( 0.965029 0.000000 0.262144 )
+( 0.000000 1.000000 0.000000 )
+( -0.262144 0.000000 0.965029 )
+set to rotationZ:
+( 0.997326 0.073083 0.000000 )
+( -0.073083 0.997326 0.000000 )
+( 0.000000 0.000000 1.000000 )
+set to rotation from Z,Y,X angles:
+( 0.723488 -0.666973 -0.178077 )
+( 0.195944 0.445752 -0.873448 )
+( 0.661945 0.597036 0.453186 )
+set to rotation from axis angle:
+( 0.836267 -0.515861 -0.185860 )
+( 0.396334 0.802918 -0.445244 )
+( 0.378915 0.298680 0.875907 )
+Matrix3 set col 0:
+( -0.658344 -0.515861 -0.185860 )
+( 0.499804 0.802918 -0.445244 )
+( -0.807257 0.298680 0.875907 )
+Matrix3 set col 1:
+( -0.658344 -0.658344 -0.185860 )
+( 0.499804 0.499804 -0.445244 )
+( -0.807257 -0.807257 0.875907 )
+Matrix3 set col 2:
+( -0.658344 -0.658344 -0.658344 )
+( 0.499804 0.499804 0.499804 )
+( -0.807257 -0.807257 -0.807257 )
+Matrix3 get col 0: ( -0.658344 0.499804 -0.807257 )
+Matrix3 get col 1: ( -0.658344 0.499804 -0.807257 )
+Matrix3 get col 2: ( -0.658344 0.499804 -0.807257 )
+Matrix3 set col 0:
+( 0.740930 -0.658344 -0.658344 )
+( 0.154607 0.499804 0.499804 )
+( 0.571599 -0.807257 -0.807257 )
+Matrix3 set col 1:
+( 0.740930 0.740930 -0.658344 )
+( 0.154607 0.154607 0.499804 )
+( 0.571599 0.571599 -0.807257 )
+Matrix3 set col 2:
+( 0.740930 0.740930 0.740930 )
+( 0.154607 0.154607 0.154607 )
+( 0.571599 0.571599 0.571599 )
+Matrix3 get col 0: ( 0.740930 0.154607 0.571599 )
+Matrix3 get col 1: ( 0.740930 0.154607 0.571599 )
+Matrix3 get col 2: ( 0.740930 0.154607 0.571599 )
+Matrix3 set row 0:
+( -0.658344 0.499804 -0.807257 )
+( 0.154607 0.154607 0.154607 )
+( 0.571599 0.571599 0.571599 )
+Matrix3 set row 1:
+( -0.658344 0.499804 -0.807257 )
+( -0.658344 0.499804 -0.807257 )
+( 0.571599 0.571599 0.571599 )
+Matrix3 set row 2:
+( -0.658344 0.499804 -0.807257 )
+( -0.658344 0.499804 -0.807257 )
+( -0.658344 0.499804 -0.807257 )
+Matrix3 get row 0: ( -0.658344 0.499804 -0.807257 )
+Matrix3 get row 1: ( -0.658344 0.499804 -0.807257 )
+Matrix3 get row 2: ( -0.658344 0.499804 -0.807257 )
+set Matrix3[0]:
+( -0.658344 0.499804 -0.807257 )
+( 0.499804 0.499804 -0.807257 )
+( -0.807257 0.499804 -0.807257 )
+set Matrix3[1]:
+( -0.658344 -0.658344 -0.807257 )
+( 0.499804 0.499804 -0.807257 )
+( -0.807257 -0.807257 -0.807257 )
+set Matrix3[2]:
+( -0.658344 -0.658344 -0.658344 )
+( 0.499804 0.499804 0.499804 )
+( -0.807257 -0.807257 -0.807257 )
+get Matrix3[0]: ( -0.658344 0.499804 -0.807257 )
+get Matrix3[1]: ( -0.658344 0.499804 -0.807257 )
+get Matrix3[2]: ( -0.658344 0.499804 -0.807257 )
+Matrix3 set elements:
+( -0.106634 -0.283632 0.910171 )
+( -0.350831 -0.203584 0.969234 )
+( 0.905168 -0.797437 0.151940 )
+-0.106634
+-0.350831
+0.905168
+-0.283632
+-0.203584
+-0.797437
+0.910171
+0.969234
+0.151940
+set Vector3 with floats: ( 0.731827 -0.700248 0.818301 )
+set Vector3 with floats: ( 0.302505 -0.872278 0.909999 )
+set Vector3 with floats: ( 0.932526 0.571087 0.610330 )
+set Vector3 with floats: ( 0.142507 -0.434829 0.925102 )
+set Vector4 with floats: ( 0.158954 -0.126283 -0.249128 0.846815 )
+set Vector4 with floats: ( -0.942601 0.537720 0.446214 0.181939 )
+set Vector4 with floats: ( -0.148223 0.284286 0.493525 -0.861963 )
+set Vector4 with floats: ( -0.893410 0.548627 0.407007 -0.757467 )
+set Point3 with floats: ( -0.393126 -0.850984 0.375720 )
+set Point3 with floats: ( -0.270088 0.458888 -0.610828 )
+set Point3 with floats: ( -0.690816 -0.676415 0.664466 )
+set Point3 with floats: ( 0.101874 -0.365714 0.055473 )
+set Quat with floats: ( -0.133556 -0.572643 0.459209 -0.997261 )
+set Quat with floats: ( 0.172409 -0.045124 0.879716 0.524317 )
+set Quat with floats: ( -0.744532 -0.970444 -0.000013 0.689543 )
+set Quat with floats: ( 0.704297 -0.817983 0.715505 0.577868 )
+set Matrix3 columns:
+( 0.731827 0.302505 0.932526 )
+( -0.700248 -0.872278 0.571087 )
+( 0.818301 0.909999 0.610330 )
+set Matrix3 columns:
+( 0.142507 0.731827 0.302505 )
+( -0.434829 -0.700248 -0.872278 )
+( 0.925102 0.818301 0.909999 )
+set Matrix4 columns:
+( 0.158954 -0.942601 -0.148223 -0.893410 )
+( -0.126283 0.537720 0.284286 0.548627 )
+( -0.249128 0.446214 0.493525 0.407007 )
+( 0.846815 0.181939 -0.861963 -0.757467 )
+set Matrix4 columns:
+( -0.893410 0.158954 -0.942601 -0.148223 )
+( 0.548627 -0.126283 0.537720 0.284286 )
+( 0.407007 -0.249128 0.446214 0.493525 )
+( -0.757467 0.846815 0.181939 -0.861963 )
+set Transform3 columns:
+( 0.731827 0.302505 0.932526 0.142507 )
+( -0.700248 -0.872278 0.571087 -0.434829 )
+( 0.818301 0.909999 0.610330 0.925102 )
+set Transform3 columns:
+( 0.142507 0.731827 0.302505 0.932526 )
+( -0.434829 -0.700248 -0.872278 0.571087 )
+( 0.925102 0.818301 0.909999 0.610330 )
+construct Matrix4 with Transform3:
+( 0.731827 0.302505 0.932526 0.142507 )
+( -0.700248 -0.872278 0.571087 -0.434829 )
+( 0.818301 0.909999 0.610330 0.925102 )
+( 0.000000 0.000000 0.000000 1.000000 )
+construct Matrix4 with Matrix3 and Vector3:
+( 0.731827 0.302505 0.932526 0.731827 )
+( -0.700248 -0.872278 0.571087 -0.700248 )
+( 0.818301 0.909999 0.610330 0.818301 )
+( 0.000000 0.000000 0.000000 1.000000 )
+construct Matrix4 with Quat and Vector3:
+( -0.077586 1.068863 1.019489 0.731827 )
+( -0.762943 0.542579 -0.792307 -0.700248 )
+( -1.264810 -0.259545 0.308485 0.818301 )
+( 0.000000 0.000000 0.000000 1.000000 )
+assign to Matrix4 from Matrix4:
+( -0.893410 0.158954 -0.942601 -0.148223 )
+( 0.548627 -0.126283 0.537720 0.284286 )
+( 0.407007 -0.249128 0.446214 0.493525 )
+( -0.757467 0.846815 0.181939 -0.861963 )
+set Matrix4 with float:
+( 0.156952 0.156952 0.156952 0.156952 )
+( 0.156952 0.156952 0.156952 0.156952 )
+( 0.156952 0.156952 0.156952 0.156952 )
+( 0.156952 0.156952 0.156952 0.156952 )
+set Matrix4 with float:
+( -0.801022 -0.801022 -0.801022 -0.801022 )
+( -0.801022 -0.801022 -0.801022 -0.801022 )
+( -0.801022 -0.801022 -0.801022 -0.801022 )
+( -0.801022 -0.801022 -0.801022 -0.801022 )
+set elements to zero:
+( 0.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 0.000000 )
+set to identity:
+( 1.000000 0.000000 0.000000 0.000000 )
+( 0.000000 1.000000 0.000000 0.000000 )
+( 0.000000 0.000000 1.000000 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to rotationX:
+( 1.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.792234 -0.610218 0.000000 )
+( 0.000000 0.610218 0.792234 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to rotationY:
+( 0.880257 0.000000 0.474498 0.000000 )
+( 0.000000 1.000000 0.000000 0.000000 )
+( -0.474498 0.000000 0.880257 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to rotationZ:
+( 0.684599 -0.728920 0.000000 0.000000 )
+( 0.728920 0.684599 0.000000 0.000000 )
+( 0.000000 0.000000 1.000000 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to rotation from Z,Y,X angles:
+( 0.718303 0.540188 0.438449 0.000000 )
+( 0.017447 0.616016 -0.787541 0.000000 )
+( -0.695512 0.573343 0.433061 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to rotation from axis angle:
+( 0.993956 -0.086019 -0.068204 0.000000 )
+( 0.080679 0.993721 -0.077527 0.000000 )
+( 0.074445 0.071556 0.994655 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to translation:
+( 1.000000 0.000000 0.000000 0.731827 )
+( 0.000000 1.000000 0.000000 -0.700248 )
+( 0.000000 0.000000 1.000000 0.818301 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to perspective matrix:
+( 232.227814 0.000000 0.000000 0.000000 )
+( 0.000000 -38.291180 0.000000 0.000000 )
+( 0.000000 0.000000 -2.308166 -0.994733 )
+( 0.000000 0.000000 -1.000000 0.000000 )
+set to frustum matrix:
+( -1.076418 0.000000 0.527081 0.000000 )
+( 0.000000 -0.465615 -0.188684 0.000000 )
+( 0.000000 0.000000 -66.946648 -26.580370 )
+( 0.000000 0.000000 -1.000000 0.000000 )
+set to orthographic matrix:
+( -2.962014 0.000000 0.000000 1.880296 )
+( 0.000000 5.281968 0.000000 -2.636149 )
+( 0.000000 0.000000 -3.467666 -2.260634 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to look-at matrix:
+( 0.275461 -0.594716 -0.755271 -0.114033 )
+( 0.958396 0.108747 0.263914 0.370155 )
+( -0.074821 -0.796547 0.599929 -0.932668 )
+( 0.000000 0.000000 0.000000 1.000000 )
+Matrix4 set col 0:
+( 0.158954 -0.594716 -0.755271 -0.114033 )
+( -0.126283 0.108747 0.263914 0.370155 )
+( -0.249128 -0.796547 0.599929 -0.932668 )
+( 0.846815 0.000000 0.000000 1.000000 )
+Matrix4 set col 1:
+( 0.158954 0.158954 -0.755271 -0.114033 )
+( -0.126283 -0.126283 0.263914 0.370155 )
+( -0.249128 -0.249128 0.599929 -0.932668 )
+( 0.846815 0.846815 0.000000 1.000000 )
+Matrix4 set col 2:
+( 0.158954 0.158954 0.158954 -0.114033 )
+( -0.126283 -0.126283 -0.126283 0.370155 )
+( -0.249128 -0.249128 -0.249128 -0.932668 )
+( 0.846815 0.846815 0.846815 1.000000 )
+Matrix4 set col 3:
+( 0.158954 0.158954 0.158954 0.158954 )
+( -0.126283 -0.126283 -0.126283 -0.126283 )
+( -0.249128 -0.249128 -0.249128 -0.249128 )
+( 0.846815 0.846815 0.846815 0.846815 )
+Matrix4 get col 0: ( 0.158954 -0.126283 -0.249128 0.846815 )
+Matrix4 get col 1: ( 0.158954 -0.126283 -0.249128 0.846815 )
+Matrix4 get col 2: ( 0.158954 -0.126283 -0.249128 0.846815 )
+Matrix4 get col 3: ( 0.158954 -0.126283 -0.249128 0.846815 )
+Matrix4 set col 0:
+( -0.942601 0.158954 0.158954 0.158954 )
+( 0.537720 -0.126283 -0.126283 -0.126283 )
+( 0.446214 -0.249128 -0.249128 -0.249128 )
+( 0.181939 0.846815 0.846815 0.846815 )
+Matrix4 set col 1:
+( -0.942601 -0.942601 0.158954 0.158954 )
+( 0.537720 0.537720 -0.126283 -0.126283 )
+( 0.446214 0.446214 -0.249128 -0.249128 )
+( 0.181939 0.181939 0.846815 0.846815 )
+Matrix4 set col 2:
+( -0.942601 -0.942601 -0.942601 0.158954 )
+( 0.537720 0.537720 0.537720 -0.126283 )
+( 0.446214 0.446214 0.446214 -0.249128 )
+( 0.181939 0.181939 0.181939 0.846815 )
+Matrix4 set col 3:
+( -0.942601 -0.942601 -0.942601 -0.942601 )
+( 0.537720 0.537720 0.537720 0.537720 )
+( 0.446214 0.446214 0.446214 0.446214 )
+( 0.181939 0.181939 0.181939 0.181939 )
+Matrix4 get col 0: ( -0.942601 0.537720 0.446214 0.181939 )
+Matrix4 get col 1: ( -0.942601 0.537720 0.446214 0.181939 )
+Matrix4 get col 2: ( -0.942601 0.537720 0.446214 0.181939 )
+Matrix4 get col 3: ( -0.942601 0.537720 0.446214 0.181939 )
+Matrix4 set row 0:
+( 0.158954 -0.126283 -0.249128 0.846815 )
+( 0.537720 0.537720 0.537720 0.537720 )
+( 0.446214 0.446214 0.446214 0.446214 )
+( 0.181939 0.181939 0.181939 0.181939 )
+Matrix4 set row 1:
+( 0.158954 -0.126283 -0.249128 0.846815 )
+( 0.158954 -0.126283 -0.249128 0.846815 )
+( 0.446214 0.446214 0.446214 0.446214 )
+( 0.181939 0.181939 0.181939 0.181939 )
+Matrix4 set row 2:
+( 0.158954 -0.126283 -0.249128 0.846815 )
+( 0.158954 -0.126283 -0.249128 0.846815 )
+( 0.158954 -0.126283 -0.249128 0.846815 )
+( 0.181939 0.181939 0.181939 0.181939 )
+Matrix4 set row 3:
+( 0.158954 -0.126283 -0.249128 0.846815 )
+( 0.158954 -0.126283 -0.249128 0.846815 )
+( 0.158954 -0.126283 -0.249128 0.846815 )
+( 0.158954 -0.126283 -0.249128 0.846815 )
+Matrix4 get row 0: ( 0.158954 -0.126283 -0.249128 0.846815 )
+Matrix4 get row 1: ( 0.158954 -0.126283 -0.249128 0.846815 )
+Matrix4 get row 2: ( 0.158954 -0.126283 -0.249128 0.846815 )
+Matrix4 get row 3: ( 0.158954 -0.126283 -0.249128 0.846815 )
+set Matrix4[0]:
+( 0.158954 -0.126283 -0.249128 0.846815 )
+( -0.126283 -0.126283 -0.249128 0.846815 )
+( -0.249128 -0.126283 -0.249128 0.846815 )
+( 0.846815 -0.126283 -0.249128 0.846815 )
+set Matrix4[1]:
+( 0.158954 0.158954 -0.249128 0.846815 )
+( -0.126283 -0.126283 -0.249128 0.846815 )
+( -0.249128 -0.249128 -0.249128 0.846815 )
+( 0.846815 0.846815 -0.249128 0.846815 )
+set Matrix4[2]:
+( 0.158954 0.158954 0.158954 0.846815 )
+( -0.126283 -0.126283 -0.126283 0.846815 )
+( -0.249128 -0.249128 -0.249128 0.846815 )
+( 0.846815 0.846815 0.846815 0.846815 )
+set Matrix4[3]:
+( 0.158954 0.158954 0.158954 0.158954 )
+( -0.126283 -0.126283 -0.126283 -0.126283 )
+( -0.249128 -0.249128 -0.249128 -0.249128 )
+( 0.846815 0.846815 0.846815 0.846815 )
+get Matrix4[0]: ( 0.158954 -0.126283 -0.249128 0.846815 )
+get Matrix4[1]: ( 0.158954 -0.126283 -0.249128 0.846815 )
+get Matrix4[2]: ( 0.158954 -0.126283 -0.249128 0.846815 )
+get Matrix4[3]: ( 0.158954 -0.126283 -0.249128 0.846815 )
+Matrix4 set elements:
+( -0.336683 0.205513 -0.071926 0.473013 )
+( 0.600164 -0.160082 -0.506313 -0.735610 )
+( -0.681272 0.962714 0.689277 -0.046390 )
+( 0.726558 0.737794 0.686485 0.568674 )
+-0.336683
+0.600164
+-0.681272
+0.726558
+0.205513
+-0.160082
+0.962714
+0.737794
+-0.071926
+-0.506313
+0.689277
+0.686485
+0.473013
+-0.735610
+-0.046390
+0.568674
+set Vector3 with floats: ( -0.004815 0.137637 -0.111879 )
+set Vector3 with floats: ( -0.929543 -0.336303 -0.146740 )
+set Vector3 with floats: ( 0.165140 -0.823874 0.349776 )
+set Vector3 with floats: ( 0.174872 -0.528584 0.489292 )
+set Vector4 with floats: ( 0.916708 0.728511 -0.851140 0.079620 )
+set Vector4 with floats: ( -0.234370 -0.996308 0.433229 -0.892684 )
+set Vector4 with floats: ( -0.957911 0.517122 0.257921 0.862028 )
+set Vector4 with floats: ( 0.095881 -0.171933 -0.214078 -0.604841 )
+set Point3 with floats: ( -0.383831 -0.581500 0.222183 )
+set Point3 with floats: ( -0.256120 -0.678699 -0.079553 )
+set Point3 with floats: ( 0.605960 -0.633147 0.435875 )
+set Point3 with floats: ( -0.046627 -0.716491 0.267317 )
+set Quat with floats: ( -0.514874 -0.751700 0.742959 -0.793180 )
+set Quat with floats: ( 0.508814 -0.238839 0.113471 -0.843523 )
+set Quat with floats: ( -0.245250 0.250368 0.579243 -0.157280 )
+set Quat with floats: ( 0.648487 0.103833 0.456401 -0.022372 )
+set Matrix3 columns:
+( -0.004815 -0.929543 0.165140 )
+( 0.137637 -0.336303 -0.823874 )
+( -0.111879 -0.146740 0.349776 )
+set Matrix3 columns:
+( 0.174872 -0.004815 -0.929543 )
+( -0.528584 0.137637 -0.336303 )
+( 0.489292 -0.111879 -0.146740 )
+set Matrix4 columns:
+( 0.916708 -0.234370 -0.957911 0.095881 )
+( 0.728511 -0.996308 0.517122 -0.171933 )
+( -0.851140 0.433229 0.257921 -0.214078 )
+( 0.079620 -0.892684 0.862028 -0.604841 )
+set Matrix4 columns:
+( 0.095881 0.916708 -0.234370 -0.957911 )
+( -0.171933 0.728511 -0.996308 0.517122 )
+( -0.214078 -0.851140 0.433229 0.257921 )
+( -0.604841 0.079620 -0.892684 0.862028 )
+set Transform3 columns:
+( -0.004815 -0.929543 0.165140 0.174872 )
+( 0.137637 -0.336303 -0.823874 -0.528584 )
+( -0.111879 -0.146740 0.349776 0.489292 )
+set Transform3 columns:
+( 0.174872 -0.004815 -0.929543 0.165140 )
+( -0.528584 0.137637 -0.336303 -0.823874 )
+( 0.489292 -0.111879 -0.146740 0.349776 )
+construct Transform3 with Matrix3 and Vector3:
+( -0.004815 -0.929543 0.165140 -0.004815 )
+( 0.137637 -0.336303 -0.823874 0.137637 )
+( -0.111879 -0.146740 0.349776 -0.111879 )
+construct Transform3 with Quat and Vector3:
+( -1.234080 1.952660 0.427405 -0.004815 )
+( -0.404539 -0.634166 -1.933738 0.137637 )
+( -1.957525 -0.300189 -0.660294 -0.111879 )
+assign to Transform3 from Transform3:
+( 0.174872 -0.004815 -0.929543 0.165140 )
+( -0.528584 0.137637 -0.336303 -0.823874 )
+( 0.489292 -0.111879 -0.146740 0.349776 )
+set Transform3 with float:
+( -0.475631 -0.475631 -0.475631 -0.475631 )
+( -0.475631 -0.475631 -0.475631 -0.475631 )
+( -0.475631 -0.475631 -0.475631 -0.475631 )
+set Transform3 with float:
+( -0.004178 -0.004178 -0.004178 -0.004178 )
+( -0.004178 -0.004178 -0.004178 -0.004178 )
+( -0.004178 -0.004178 -0.004178 -0.004178 )
+set elements to zero:
+( 0.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 0.000000 )
+set to identity:
+( 1.000000 0.000000 0.000000 0.000000 )
+( 0.000000 1.000000 0.000000 0.000000 )
+( 0.000000 0.000000 1.000000 0.000000 )
+set to rotationX:
+( 1.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.999782 0.020864 0.000000 )
+( 0.000000 -0.020864 0.999782 0.000000 )
+set to rotationY:
+( 0.999856 0.000000 -0.016996 0.000000 )
+( 0.000000 1.000000 0.000000 0.000000 )
+( 0.016996 0.000000 0.999856 0.000000 )
+set to rotationZ:
+( 0.765394 -0.643562 0.000000 0.000000 )
+( 0.643562 0.765394 0.000000 0.000000 )
+( 0.000000 0.000000 1.000000 0.000000 )
+set to rotation from Z,Y,X angles:
+( 0.643764 -0.756736 -0.113659 0.000000 )
+( 0.715027 0.647771 -0.262922 0.000000 )
+( 0.272588 0.087991 0.958099 0.000000 )
+set to rotation from axis angle:
+( 0.978242 0.130404 0.161363 0.000000 )
+( -0.131321 0.991327 -0.005018 0.000000 )
+( -0.160618 -0.016281 0.986882 0.000000 )
+set to translation:
+( 1.000000 0.000000 0.000000 -0.004815 )
+( 0.000000 1.000000 0.000000 0.137637 )
+( 0.000000 0.000000 1.000000 -0.111879 )
+Transform3 set col 0:
+( -0.004815 0.000000 0.000000 -0.004815 )
+( 0.137637 1.000000 0.000000 0.137637 )
+( -0.111879 0.000000 1.000000 -0.111879 )
+Transform3 set col 1:
+( -0.004815 -0.004815 0.000000 -0.004815 )
+( 0.137637 0.137637 0.000000 0.137637 )
+( -0.111879 -0.111879 1.000000 -0.111879 )
+Transform3 set col 2:
+( -0.004815 -0.004815 -0.004815 -0.004815 )
+( 0.137637 0.137637 0.137637 0.137637 )
+( -0.111879 -0.111879 -0.111879 -0.111879 )
+Transform3 set col 3:
+( -0.004815 -0.004815 -0.004815 -0.004815 )
+( 0.137637 0.137637 0.137637 0.137637 )
+( -0.111879 -0.111879 -0.111879 -0.111879 )
+Transform3 get col 0: ( -0.004815 0.137637 -0.111879 )
+Transform3 get col 1: ( -0.004815 0.137637 -0.111879 )
+Transform3 get col 2: ( -0.004815 0.137637 -0.111879 )
+Transform3 get col 3: ( -0.004815 0.137637 -0.111879 )
+Transform3 set col 0:
+( -0.929543 -0.004815 -0.004815 -0.004815 )
+( -0.336303 0.137637 0.137637 0.137637 )
+( -0.146740 -0.111879 -0.111879 -0.111879 )
+Transform3 set col 1:
+( -0.929543 -0.929543 -0.004815 -0.004815 )
+( -0.336303 -0.336303 0.137637 0.137637 )
+( -0.146740 -0.146740 -0.111879 -0.111879 )
+Transform3 set col 2:
+( -0.929543 -0.929543 -0.929543 -0.004815 )
+( -0.336303 -0.336303 -0.336303 0.137637 )
+( -0.146740 -0.146740 -0.146740 -0.111879 )
+Transform3 set col 3:
+( -0.929543 -0.929543 -0.929543 -0.929543 )
+( -0.336303 -0.336303 -0.336303 -0.336303 )
+( -0.146740 -0.146740 -0.146740 -0.146740 )
+Transform3 get col 0: ( -0.929543 -0.336303 -0.146740 )
+Transform3 get col 1: ( -0.929543 -0.336303 -0.146740 )
+Transform3 get col 2: ( -0.929543 -0.336303 -0.146740 )
+Transform3 get col 3: ( -0.929543 -0.336303 -0.146740 )
+Transform3 set row 0:
+( 0.916708 0.728511 -0.851140 0.079620 )
+( -0.336303 -0.336303 -0.336303 -0.336303 )
+( -0.146740 -0.146740 -0.146740 -0.146740 )
+Transform3 set row 1:
+( 0.916708 0.728511 -0.851140 0.079620 )
+( 0.916708 0.728511 -0.851140 0.079620 )
+( -0.146740 -0.146740 -0.146740 -0.146740 )
+Transform3 set row 2:
+( 0.916708 0.728511 -0.851140 0.079620 )
+( 0.916708 0.728511 -0.851140 0.079620 )
+( 0.916708 0.728511 -0.851140 0.079620 )
+Transform3 get row 0: ( 0.916708 0.728511 -0.851140 0.079620 )
+Transform3 get row 1: ( 0.916708 0.728511 -0.851140 0.079620 )
+Transform3 get row 2: ( 0.916708 0.728511 -0.851140 0.079620 )
+set Transform3[0]:
+( -0.004815 0.728511 -0.851140 0.079620 )
+( 0.137637 0.728511 -0.851140 0.079620 )
+( -0.111879 0.728511 -0.851140 0.079620 )
+set Transform3[1]:
+( -0.004815 -0.004815 -0.851140 0.079620 )
+( 0.137637 0.137637 -0.851140 0.079620 )
+( -0.111879 -0.111879 -0.851140 0.079620 )
+set Transform3[2]:
+( -0.004815 -0.004815 -0.004815 0.079620 )
+( 0.137637 0.137637 0.137637 0.079620 )
+( -0.111879 -0.111879 -0.111879 0.079620 )
+set Transform3[3]:
+( -0.004815 -0.004815 -0.004815 -0.004815 )
+( 0.137637 0.137637 0.137637 0.137637 )
+( -0.111879 -0.111879 -0.111879 -0.111879 )
+get Transform3[0]: ( -0.004815 0.137637 -0.111879 )
+get Transform3[1]: ( -0.004815 0.137637 -0.111879 )
+get Transform3[2]: ( -0.004815 0.137637 -0.111879 )
+get Transform3[3]: ( -0.004815 0.137637 -0.111879 )
+Transform3 set elements:
+( 0.219317 -0.567698 0.572490 0.867178 )
+( -0.118359 0.531358 -0.820417 0.934764 )
+( 0.413442 -0.387226 0.797191 0.237092 )
+0.219317
+-0.118359
+0.413442
+-0.567698
+0.531358
+-0.387226
+0.572490
+-0.820417
+0.797191
+0.867178
+0.934764
+0.237092
+set Vector3 with floats: ( -0.866162 -0.773939 0.261311 )
+set Vector3 with floats: ( -0.851570 0.114814 -0.531592 )
+set Vector3 with floats: ( 0.223925 0.869105 0.143405 )
+set Vector3 with floats: ( 0.148518 -0.071136 -0.758292 )
+set Vector4 with floats: ( -0.527633 0.997215 0.114440 0.727558 )
+set Vector4 with floats: ( -0.425760 0.459888 0.642516 -0.022534 )
+set Vector4 with floats: ( 0.186095 -0.775679 -0.683401 0.398134 )
+set Vector4 with floats: ( 0.189642 0.765986 -0.137795 -0.579844 )
+set Point3 with floats: ( -0.635647 0.374970 -0.563750 )
+set Point3 with floats: ( -0.471075 -0.553800 -0.014688 )
+set Point3 with floats: ( -0.464365 -0.107890 -0.527503 )
+set Point3 with floats: ( -0.406423 0.301261 0.499529 )
+set Quat with floats: ( 0.385180 -0.150218 0.519112 -0.203209 )
+set Quat with floats: ( -0.252017 0.282194 0.067637 0.798376 )
+set Quat with floats: ( 0.310782 0.861334 -0.980345 -0.655106 )
+set Quat with floats: ( 0.286765 0.532078 0.352671 0.540977 )
+set Matrix3 columns:
+( -0.866162 -0.851570 0.223925 )
+( -0.773939 0.114814 0.869105 )
+( 0.261311 -0.531592 0.143405 )
+set Matrix3 columns:
+( 0.148518 -0.866162 -0.851570 )
+( -0.071136 -0.773939 0.114814 )
+( -0.758292 0.261311 -0.531592 )
+set Matrix4 columns:
+( -0.527633 -0.425760 0.186095 0.189642 )
+( 0.997215 0.459888 -0.775679 0.765986 )
+( 0.114440 0.642516 -0.683401 -0.137795 )
+( 0.727558 -0.022534 0.398134 -0.579844 )
+set Matrix4 columns:
+( 0.189642 -0.527633 -0.425760 0.186095 )
+( 0.765986 0.997215 0.459888 -0.775679 )
+( -0.137795 0.114440 0.642516 -0.683401 )
+( -0.579844 0.727558 -0.022534 0.398134 )
+set Transform3 columns:
+( -0.866162 -0.851570 0.223925 0.148518 )
+( -0.773939 0.114814 0.869105 -0.071136 )
+( 0.261311 -0.531592 0.143405 -0.758292 )
+set Transform3 columns:
+( 0.148518 -0.866162 -0.851570 0.223925 )
+( -0.071136 -0.773939 0.114814 0.869105 )
+( -0.758292 0.261311 -0.531592 0.143405 )
+construct Matrix3 with Quat:
+( -0.212551 0.197745 0.956932 )
+( -0.678219 -0.734858 0.001211 )
+( 0.703449 -0.648753 0.290309 )
+construct Quat with Matrix3: ( 0.554977 -0.216439 0.747950 -0.292789 )
+assign to Matrix3 from Matrix3:
+( 0.148518 -0.866162 -0.851570 )
+( -0.071136 -0.773939 0.114814 )
+( -0.758292 0.261311 -0.531592 )
+set Matrix3 with float:
+( 0.510961 0.510961 0.510961 )
+( 0.510961 0.510961 0.510961 )
+( 0.510961 0.510961 0.510961 )
+set Matrix3 with float:
+( 0.791871 0.791871 0.791871 )
+( 0.791871 0.791871 0.791871 )
+( 0.791871 0.791871 0.791871 )
+set elements to zero:
+( 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 )
+set to identity:
+( 1.000000 0.000000 0.000000 )
+( 0.000000 1.000000 0.000000 )
+( 0.000000 0.000000 1.000000 )
+set to rotationX:
+( 1.000000 0.000000 0.000000 )
+( 0.000000 0.844921 0.534891 )
+( 0.000000 -0.534891 0.844921 )
+set to rotationY:
+( 0.962913 0.000000 0.269813 )
+( 0.000000 1.000000 0.000000 )
+( -0.269813 0.000000 0.962913 )
+set to rotationZ:
+( 0.981168 -0.193156 0.000000 )
+( 0.193156 0.981168 0.000000 )
+( 0.000000 0.000000 1.000000 )
+set to rotation from Z,Y,X angles:
+( 0.935176 0.037174 -0.352226 )
+( 0.233454 0.683174 0.691934 )
+( 0.266354 -0.729309 0.630210 )
+set to rotation from axis angle:
+( 0.943492 0.161034 0.289639 )
+( -0.047495 0.930683 -0.362731 )
+( -0.327974 0.328477 0.885740 )
+Matrix3 set col 0:
+( -0.866162 0.161034 0.289639 )
+( -0.773939 0.930683 -0.362731 )
+( 0.261311 0.328477 0.885740 )
+Matrix3 set col 1:
+( -0.866162 -0.866162 0.289639 )
+( -0.773939 -0.773939 -0.362731 )
+( 0.261311 0.261311 0.885740 )
+Matrix3 set col 2:
+( -0.866162 -0.866162 -0.866162 )
+( -0.773939 -0.773939 -0.773939 )
+( 0.261311 0.261311 0.261311 )
+Matrix3 get col 0: ( -0.866162 -0.773939 0.261311 )
+Matrix3 get col 1: ( -0.866162 -0.773939 0.261311 )
+Matrix3 get col 2: ( -0.866162 -0.773939 0.261311 )
+Matrix3 set col 0:
+( -0.851570 -0.866162 -0.866162 )
+( 0.114814 -0.773939 -0.773939 )
+( -0.531592 0.261311 0.261311 )
+Matrix3 set col 1:
+( -0.851570 -0.851570 -0.866162 )
+( 0.114814 0.114814 -0.773939 )
+( -0.531592 -0.531592 0.261311 )
+Matrix3 set col 2:
+( -0.851570 -0.851570 -0.851570 )
+( 0.114814 0.114814 0.114814 )
+( -0.531592 -0.531592 -0.531592 )
+Matrix3 get col 0: ( -0.851570 0.114814 -0.531592 )
+Matrix3 get col 1: ( -0.851570 0.114814 -0.531592 )
+Matrix3 get col 2: ( -0.851570 0.114814 -0.531592 )
+Matrix3 set row 0:
+( -0.866162 -0.773939 0.261311 )
+( 0.114814 0.114814 0.114814 )
+( -0.531592 -0.531592 -0.531592 )
+Matrix3 set row 1:
+( -0.866162 -0.773939 0.261311 )
+( -0.866162 -0.773939 0.261311 )
+( -0.531592 -0.531592 -0.531592 )
+Matrix3 set row 2:
+( -0.866162 -0.773939 0.261311 )
+( -0.866162 -0.773939 0.261311 )
+( -0.866162 -0.773939 0.261311 )
+Matrix3 get row 0: ( -0.866162 -0.773939 0.261311 )
+Matrix3 get row 1: ( -0.866162 -0.773939 0.261311 )
+Matrix3 get row 2: ( -0.866162 -0.773939 0.261311 )
+set Matrix3[0]:
+( -0.866162 -0.773939 0.261311 )
+( -0.773939 -0.773939 0.261311 )
+( 0.261311 -0.773939 0.261311 )
+set Matrix3[1]:
+( -0.866162 -0.866162 0.261311 )
+( -0.773939 -0.773939 0.261311 )
+( 0.261311 0.261311 0.261311 )
+set Matrix3[2]:
+( -0.866162 -0.866162 -0.866162 )
+( -0.773939 -0.773939 -0.773939 )
+( 0.261311 0.261311 0.261311 )
+get Matrix3[0]: ( -0.866162 -0.773939 0.261311 )
+get Matrix3[1]: ( -0.866162 -0.773939 0.261311 )
+get Matrix3[2]: ( -0.866162 -0.773939 0.261311 )
+Matrix3 set elements:
+( -0.277798 -0.420178 0.795389 )
+( -0.032740 -0.522577 0.342900 )
+( 0.007412 0.324972 -0.913636 )
+-0.277798
+-0.032740
+0.007412
+-0.420178
+-0.522577
+0.324972
+0.795389
+0.342900
+-0.913636
+set Vector3 with floats: ( 0.675222 0.144053 -0.632329 )
+set Vector3 with floats: ( -0.947120 -0.049367 0.126333 )
+set Vector3 with floats: ( -0.664206 0.220879 0.284219 )
+set Vector3 with floats: ( -0.387216 0.913568 0.531906 )
+set Vector4 with floats: ( 0.271995 -0.862601 -0.738694 0.514248 )
+set Vector4 with floats: ( -0.039363 0.429390 -0.769469 0.281336 )
+set Vector4 with floats: ( -0.203301 0.412586 0.567926 0.410131 )
+set Vector4 with floats: ( -0.462918 0.560953 -0.731715 -0.446158 )
+set Point3 with floats: ( -0.837491 -0.573480 -0.607820 )
+set Point3 with floats: ( 0.238415 0.213445 0.098612 )
+set Point3 with floats: ( 0.135072 -0.749274 -0.855977 )
+set Point3 with floats: ( 0.765675 -0.693447 0.131554 )
+set Quat with floats: ( -0.366757 -0.330409 -0.588816 0.352533 )
+set Quat with floats: ( -0.920523 -0.915255 0.631924 -0.870739 )
+set Quat with floats: ( 0.415605 -0.180974 0.775698 0.879357 )
+set Quat with floats: ( -0.993958 -0.298452 0.876855 0.982846 )
+set Matrix3 columns:
+( 0.675222 -0.947120 -0.664206 )
+( 0.144053 -0.049367 0.220879 )
+( -0.632329 0.126333 0.284219 )
+set Matrix3 columns:
+( -0.387216 0.675222 -0.947120 )
+( 0.913568 0.144053 -0.049367 )
+( 0.531906 -0.632329 0.126333 )
+set Matrix4 columns:
+( 0.271995 -0.039363 -0.203301 -0.462918 )
+( -0.862601 0.429390 0.412586 0.560953 )
+( -0.738694 -0.769469 0.567926 -0.731715 )
+( 0.514248 0.281336 0.410131 -0.446158 )
+set Matrix4 columns:
+( -0.462918 0.271995 -0.039363 -0.203301 )
+( 0.560953 -0.862601 0.429390 0.412586 )
+( -0.731715 -0.738694 -0.769469 0.567926 )
+( -0.446158 0.514248 0.281336 0.410131 )
+set Transform3 columns:
+( 0.675222 -0.947120 -0.664206 -0.387216 )
+( 0.144053 -0.049367 0.220879 0.913568 )
+( -0.632329 0.126333 0.284219 0.531906 )
+set Transform3 columns:
+( -0.387216 0.675222 -0.947120 -0.664206 )
+( 0.913568 0.144053 -0.049367 0.220879 )
+( 0.531906 -0.632329 0.126333 0.284219 )
+construct Matrix4 with Transform3:
+( 0.675222 -0.947120 -0.664206 -0.387216 )
+( 0.144053 -0.049367 0.220879 0.913568 )
+( -0.632329 0.126333 0.284219 0.531906 )
+( 0.000000 0.000000 0.000000 1.000000 )
+construct Matrix4 with Matrix3 and Vector3:
+( 0.675222 -0.947120 -0.664206 0.675222 )
+( 0.144053 -0.049367 0.220879 0.144053 )
+( -0.632329 0.126333 0.284219 -0.632329 )
+( 0.000000 0.000000 0.000000 1.000000 )
+construct Matrix4 with Quat and Vector3:
+( 0.088252 0.657513 0.198944 0.675222 )
+( -0.172794 0.037572 0.647688 0.144053 )
+( 0.664864 0.130513 0.512639 -0.632329 )
+( 0.000000 0.000000 0.000000 1.000000 )
+assign to Matrix4 from Matrix4:
+( -0.462918 0.271995 -0.039363 -0.203301 )
+( 0.560953 -0.862601 0.429390 0.412586 )
+( -0.731715 -0.738694 -0.769469 0.567926 )
+( -0.446158 0.514248 0.281336 0.410131 )
+set Matrix4 with float:
+( -0.282564 -0.282564 -0.282564 -0.282564 )
+( -0.282564 -0.282564 -0.282564 -0.282564 )
+( -0.282564 -0.282564 -0.282564 -0.282564 )
+( -0.282564 -0.282564 -0.282564 -0.282564 )
+set Matrix4 with float:
+( 0.953469 0.953469 0.953469 0.953469 )
+( 0.953469 0.953469 0.953469 0.953469 )
+( 0.953469 0.953469 0.953469 0.953469 )
+( 0.953469 0.953469 0.953469 0.953469 )
+set elements to zero:
+( 0.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 0.000000 )
+set to identity:
+( 1.000000 0.000000 0.000000 0.000000 )
+( 0.000000 1.000000 0.000000 0.000000 )
+( 0.000000 0.000000 1.000000 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to rotationX:
+( 1.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.583868 0.811849 0.000000 )
+( 0.000000 -0.811849 0.583868 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to rotationY:
+( 0.949918 0.000000 -0.312499 0.000000 )
+( 0.000000 1.000000 0.000000 0.000000 )
+( 0.312499 0.000000 0.949918 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to rotationZ:
+( 0.769721 -0.638380 0.000000 0.000000 )
+( 0.638380 0.769721 0.000000 0.000000 )
+( 0.000000 0.000000 1.000000 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to rotation from Z,Y,X angles:
+( 0.911294 0.177317 0.371620 0.000000 )
+( -0.113182 0.975631 -0.187972 0.000000 )
+( -0.395894 0.129237 0.909156 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to rotation from axis angle:
+( 0.856844 -0.448125 -0.254956 0.000000 )
+( 0.514339 0.708724 0.482872 0.000000 )
+( -0.035694 -0.544879 0.837754 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to translation:
+( 1.000000 0.000000 0.000000 0.675222 )
+( 0.000000 1.000000 0.000000 0.144053 )
+( 0.000000 0.000000 1.000000 -0.632329 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to perspective matrix:
+( 5.824870 0.000000 0.000000 0.000000 )
+( 0.000000 -3.026335 0.000000 0.000000 )
+( 0.000000 0.000000 -2.224833 -1.188350 )
+( 0.000000 0.000000 -1.000000 0.000000 )
+set to frustum matrix:
+( -0.477514 0.000000 1.070274 0.000000 )
+( 0.000000 -24.527069 -7.801479 0.000000 )
+( 0.000000 0.000000 -0.473057 0.272420 )
+( 0.000000 0.000000 -1.000000 0.000000 )
+set to orthographic matrix:
+( 32.423248 0.000000 0.000000 23.964319 )
+( 0.000000 -2.278212 0.000000 -0.092664 )
+( 0.000000 0.000000 2.155617 -0.869770 )
+( 0.000000 0.000000 0.000000 1.000000 )
+set to look-at matrix:
+( -0.441820 0.853126 -0.277437 -0.049401 )
+( 0.544212 0.009028 -0.838899 -0.048950 )
+( -0.713182 -0.521627 -0.468270 -1.181050 )
+( 0.000000 0.000000 0.000000 1.000000 )
+Matrix4 set col 0:
+( 0.271995 0.853126 -0.277437 -0.049401 )
+( -0.862601 0.009028 -0.838899 -0.048950 )
+( -0.738694 -0.521627 -0.468270 -1.181050 )
+( 0.514248 0.000000 0.000000 1.000000 )
+Matrix4 set col 1:
+( 0.271995 0.271995 -0.277437 -0.049401 )
+( -0.862601 -0.862601 -0.838899 -0.048950 )
+( -0.738694 -0.738694 -0.468270 -1.181050 )
+( 0.514248 0.514248 0.000000 1.000000 )
+Matrix4 set col 2:
+( 0.271995 0.271995 0.271995 -0.049401 )
+( -0.862601 -0.862601 -0.862601 -0.048950 )
+( -0.738694 -0.738694 -0.738694 -1.181050 )
+( 0.514248 0.514248 0.514248 1.000000 )
+Matrix4 set col 3:
+( 0.271995 0.271995 0.271995 0.271995 )
+( -0.862601 -0.862601 -0.862601 -0.862601 )
+( -0.738694 -0.738694 -0.738694 -0.738694 )
+( 0.514248 0.514248 0.514248 0.514248 )
+Matrix4 get col 0: ( 0.271995 -0.862601 -0.738694 0.514248 )
+Matrix4 get col 1: ( 0.271995 -0.862601 -0.738694 0.514248 )
+Matrix4 get col 2: ( 0.271995 -0.862601 -0.738694 0.514248 )
+Matrix4 get col 3: ( 0.271995 -0.862601 -0.738694 0.514248 )
+Matrix4 set col 0:
+( -0.039363 0.271995 0.271995 0.271995 )
+( 0.429390 -0.862601 -0.862601 -0.862601 )
+( -0.769469 -0.738694 -0.738694 -0.738694 )
+( 0.281336 0.514248 0.514248 0.514248 )
+Matrix4 set col 1:
+( -0.039363 -0.039363 0.271995 0.271995 )
+( 0.429390 0.429390 -0.862601 -0.862601 )
+( -0.769469 -0.769469 -0.738694 -0.738694 )
+( 0.281336 0.281336 0.514248 0.514248 )
+Matrix4 set col 2:
+( -0.039363 -0.039363 -0.039363 0.271995 )
+( 0.429390 0.429390 0.429390 -0.862601 )
+( -0.769469 -0.769469 -0.769469 -0.738694 )
+( 0.281336 0.281336 0.281336 0.514248 )
+Matrix4 set col 3:
+( -0.039363 -0.039363 -0.039363 -0.039363 )
+( 0.429390 0.429390 0.429390 0.429390 )
+( -0.769469 -0.769469 -0.769469 -0.769469 )
+( 0.281336 0.281336 0.281336 0.281336 )
+Matrix4 get col 0: ( -0.039363 0.429390 -0.769469 0.281336 )
+Matrix4 get col 1: ( -0.039363 0.429390 -0.769469 0.281336 )
+Matrix4 get col 2: ( -0.039363 0.429390 -0.769469 0.281336 )
+Matrix4 get col 3: ( -0.039363 0.429390 -0.769469 0.281336 )
+Matrix4 set row 0:
+( 0.271995 -0.862601 -0.738694 0.514248 )
+( 0.429390 0.429390 0.429390 0.429390 )
+( -0.769469 -0.769469 -0.769469 -0.769469 )
+( 0.281336 0.281336 0.281336 0.281336 )
+Matrix4 set row 1:
+( 0.271995 -0.862601 -0.738694 0.514248 )
+( 0.271995 -0.862601 -0.738694 0.514248 )
+( -0.769469 -0.769469 -0.769469 -0.769469 )
+( 0.281336 0.281336 0.281336 0.281336 )
+Matrix4 set row 2:
+( 0.271995 -0.862601 -0.738694 0.514248 )
+( 0.271995 -0.862601 -0.738694 0.514248 )
+( 0.271995 -0.862601 -0.738694 0.514248 )
+( 0.281336 0.281336 0.281336 0.281336 )
+Matrix4 set row 3:
+( 0.271995 -0.862601 -0.738694 0.514248 )
+( 0.271995 -0.862601 -0.738694 0.514248 )
+( 0.271995 -0.862601 -0.738694 0.514248 )
+( 0.271995 -0.862601 -0.738694 0.514248 )
+Matrix4 get row 0: ( 0.271995 -0.862601 -0.738694 0.514248 )
+Matrix4 get row 1: ( 0.271995 -0.862601 -0.738694 0.514248 )
+Matrix4 get row 2: ( 0.271995 -0.862601 -0.738694 0.514248 )
+Matrix4 get row 3: ( 0.271995 -0.862601 -0.738694 0.514248 )
+set Matrix4[0]:
+( 0.271995 -0.862601 -0.738694 0.514248 )
+( -0.862601 -0.862601 -0.738694 0.514248 )
+( -0.738694 -0.862601 -0.738694 0.514248 )
+( 0.514248 -0.862601 -0.738694 0.514248 )
+set Matrix4[1]:
+( 0.271995 0.271995 -0.738694 0.514248 )
+( -0.862601 -0.862601 -0.738694 0.514248 )
+( -0.738694 -0.738694 -0.738694 0.514248 )
+( 0.514248 0.514248 -0.738694 0.514248 )
+set Matrix4[2]:
+( 0.271995 0.271995 0.271995 0.514248 )
+( -0.862601 -0.862601 -0.862601 0.514248 )
+( -0.738694 -0.738694 -0.738694 0.514248 )
+( 0.514248 0.514248 0.514248 0.514248 )
+set Matrix4[3]:
+( 0.271995 0.271995 0.271995 0.271995 )
+( -0.862601 -0.862601 -0.862601 -0.862601 )
+( -0.738694 -0.738694 -0.738694 -0.738694 )
+( 0.514248 0.514248 0.514248 0.514248 )
+get Matrix4[0]: ( 0.271995 -0.862601 -0.738694 0.514248 )
+get Matrix4[1]: ( 0.271995 -0.862601 -0.738694 0.514248 )
+get Matrix4[2]: ( 0.271995 -0.862601 -0.738694 0.514248 )
+get Matrix4[3]: ( 0.271995 -0.862601 -0.738694 0.514248 )
+Matrix4 set elements:
+( -0.702364 -0.967562 -0.470750 -0.712890 )
+( -0.182602 -0.520296 -0.846580 -0.064487 )
+( -0.832807 0.160191 -0.705751 0.444065 )
+( 0.278191 -0.677990 -0.825368 -0.045226 )
+-0.702364
+-0.182602
+-0.832807
+0.278191
+-0.967562
+-0.520296
+0.160191
+-0.677990
+-0.470750
+-0.846580
+-0.705751
+-0.825368
+-0.712890
+-0.064487
+0.444065
+-0.045226
+set Vector3 with floats: ( 0.116544 -0.007285 -0.838230 )
+set Vector3 with floats: ( -0.410767 -0.409299 -0.336683 )
+set Vector3 with floats: ( -0.830700 -0.801729 -0.595153 )
+set Vector3 with floats: ( -0.784672 -0.653655 0.670791 )
+set Vector4 with floats: ( 0.653571 0.850716 0.071433 -0.057715 )
+set Vector4 with floats: ( 0.401895 0.016861 0.535438 0.965901 )
+set Vector4 with floats: ( -0.072675 0.341689 0.781662 0.707322 )
+set Vector4 with floats: ( 0.505889 0.432790 -0.825793 0.597719 )
+set Point3 with floats: ( -0.250905 -0.085712 -0.272847 )
+set Point3 with floats: ( 0.552644 0.194203 -0.884427 )
+set Point3 with floats: ( -0.756791 0.121107 0.312483 )
+set Point3 with floats: ( -0.873317 0.045081 0.687444 )
+set Quat with floats: ( -0.684626 -0.994047 0.800366 -0.565640 )
+set Quat with floats: ( -0.625626 0.932585 0.636928 0.420250 )
+set Quat with floats: ( 0.175863 0.362158 0.547496 -0.534423 )
+set Quat with floats: ( -0.563030 -0.254488 0.647778 -0.674240 )
+set Matrix3 columns:
+( 0.116544 -0.410767 -0.830700 )
+( -0.007285 -0.409299 -0.801729 )
+( -0.838230 -0.336683 -0.595153 )
+set Matrix3 columns:
+( -0.784672 0.116544 -0.410767 )
+( -0.653655 -0.007285 -0.409299 )
+( 0.670791 -0.838230 -0.336683 )
+set Matrix4 columns:
+( 0.653571 0.401895 -0.072675 0.505889 )
+( 0.850716 0.016861 0.341689 0.432790 )
+( 0.071433 0.535438 0.781662 -0.825793 )
+( -0.057715 0.965901 0.707322 0.597719 )
+set Matrix4 columns:
+( 0.505889 0.653571 0.401895 -0.072675 )
+( 0.432790 0.850716 0.016861 0.341689 )
+( -0.825793 0.071433 0.535438 0.781662 )
+( 0.597719 -0.057715 0.965901 0.707322 )
+set Transform3 columns:
+( 0.116544 -0.410767 -0.830700 -0.784672 )
+( -0.007285 -0.409299 -0.801729 -0.653655 )
+( -0.838230 -0.336683 -0.595153 0.670791 )
+set Transform3 columns:
+( -0.784672 0.116544 -0.410767 -0.830700 )
+( -0.653655 -0.007285 -0.409299 -0.801729 )
+( 0.670791 -0.838230 -0.336683 -0.595153 )
+construct Transform3 with Matrix3 and Vector3:
+( 0.116544 -0.410767 -0.830700 0.116544 )
+( -0.007285 -0.409299 -0.801729 -0.007285 )
+( -0.838230 -0.336683 -0.595153 -0.838230 )
+construct Transform3 with Quat and Vector3:
+( -2.257427 2.266537 0.028644 0.116544 )
+( 0.455662 -1.218594 -2.365705 -0.007285 )
+( -2.220447 -0.816698 -1.913682 -0.838230 )
+assign to Transform3 from Transform3:
+( -0.784672 0.116544 -0.410767 -0.830700 )
+( -0.653655 -0.007285 -0.409299 -0.801729 )
+( 0.670791 -0.838230 -0.336683 -0.595153 )
+set Transform3 with float:
+( 0.224345 0.224345 0.224345 0.224345 )
+( 0.224345 0.224345 0.224345 0.224345 )
+( 0.224345 0.224345 0.224345 0.224345 )
+set Transform3 with float:
+( -0.754563 -0.754563 -0.754563 -0.754563 )
+( -0.754563 -0.754563 -0.754563 -0.754563 )
+( -0.754563 -0.754563 -0.754563 -0.754563 )
+set elements to zero:
+( 0.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.000000 0.000000 0.000000 )
+set to identity:
+( 1.000000 0.000000 0.000000 0.000000 )
+( 0.000000 1.000000 0.000000 0.000000 )
+( 0.000000 0.000000 1.000000 0.000000 )
+set to rotationX:
+( 1.000000 0.000000 0.000000 0.000000 )
+( 0.000000 0.991715 0.128461 0.000000 )
+( 0.000000 -0.128461 0.991715 0.000000 )
+set to rotationY:
+( 0.542120 0.000000 -0.840301 0.000000 )
+( 0.000000 1.000000 0.000000 0.000000 )
+( 0.840301 0.000000 0.542120 0.000000 )
+set to rotationZ:
+( 0.999319 -0.036908 0.000000 0.000000 )
+( 0.036908 0.999319 0.000000 0.000000 )
+( 0.000000 0.000000 1.000000 0.000000 )
+set to rotation from Z,Y,X angles:
+( 0.927336 -0.248591 0.279733 0.000000 )
+( -0.072059 0.614889 0.785314 0.000000 )
+( -0.367227 -0.748408 0.552296 0.000000 )
+set to rotation from axis angle:
+( 0.982382 -0.186883 -0.000825 0.000000 )
+( 0.186841 0.982043 0.026134 0.000000 )
+( -0.004073 -0.025827 0.999658 0.000000 )
+set to translation:
+( 1.000000 0.000000 0.000000 0.116544 )
+( 0.000000 1.000000 0.000000 -0.007285 )
+( 0.000000 0.000000 1.000000 -0.838230 )
+Transform3 set col 0:
+( 0.116544 0.000000 0.000000 0.116544 )
+( -0.007285 1.000000 0.000000 -0.007285 )
+( -0.838230 0.000000 1.000000 -0.838230 )
+Transform3 set col 1:
+( 0.116544 0.116544 0.000000 0.116544 )
+( -0.007285 -0.007285 0.000000 -0.007285 )
+( -0.838230 -0.838230 1.000000 -0.838230 )
+Transform3 set col 2:
+( 0.116544 0.116544 0.116544 0.116544 )
+( -0.007285 -0.007285 -0.007285 -0.007285 )
+( -0.838230 -0.838230 -0.838230 -0.838230 )
+Transform3 set col 3:
+( 0.116544 0.116544 0.116544 0.116544 )
+( -0.007285 -0.007285 -0.007285 -0.007285 )
+( -0.838230 -0.838230 -0.838230 -0.838230 )
+Transform3 get col 0: ( 0.116544 -0.007285 -0.838230 )
+Transform3 get col 1: ( 0.116544 -0.007285 -0.838230 )
+Transform3 get col 2: ( 0.116544 -0.007285 -0.838230 )
+Transform3 get col 3: ( 0.116544 -0.007285 -0.838230 )
+Transform3 set col 0:
+( -0.410767 0.116544 0.116544 0.116544 )
+( -0.409299 -0.007285 -0.007285 -0.007285 )
+( -0.336683 -0.838230 -0.838230 -0.838230 )
+Transform3 set col 1:
+( -0.410767 -0.410767 0.116544 0.116544 )
+( -0.409299 -0.409299 -0.007285 -0.007285 )
+( -0.336683 -0.336683 -0.838230 -0.838230 )
+Transform3 set col 2:
+( -0.410767 -0.410767 -0.410767 0.116544 )
+( -0.409299 -0.409299 -0.409299 -0.007285 )
+( -0.336683 -0.336683 -0.336683 -0.838230 )
+Transform3 set col 3:
+( -0.410767 -0.410767 -0.410767 -0.410767 )
+( -0.409299 -0.409299 -0.409299 -0.409299 )
+( -0.336683 -0.336683 -0.336683 -0.336683 )
+Transform3 get col 0: ( -0.410767 -0.409299 -0.336683 )
+Transform3 get col 1: ( -0.410767 -0.409299 -0.336683 )
+Transform3 get col 2: ( -0.410767 -0.409299 -0.336683 )
+Transform3 get col 3: ( -0.410767 -0.409299 -0.336683 )
+Transform3 set row 0:
+( 0.653571 0.850716 0.071433 -0.057715 )
+( -0.409299 -0.409299 -0.409299 -0.409299 )
+( -0.336683 -0.336683 -0.336683 -0.336683 )
+Transform3 set row 1:
+( 0.653571 0.850716 0.071433 -0.057715 )
+( 0.653571 0.850716 0.071433 -0.057715 )
+( -0.336683 -0.336683 -0.336683 -0.336683 )
+Transform3 set row 2:
+( 0.653571 0.850716 0.071433 -0.057715 )
+( 0.653571 0.850716 0.071433 -0.057715 )
+( 0.653571 0.850716 0.071433 -0.057715 )
+Transform3 get row 0: ( 0.653571 0.850716 0.071433 -0.057715 )
+Transform3 get row 1: ( 0.653571 0.850716 0.071433 -0.057715 )
+Transform3 get row 2: ( 0.653571 0.850716 0.071433 -0.057715 )
+set Transform3[0]:
+( 0.116544 0.850716 0.071433 -0.057715 )
+( -0.007285 0.850716 0.071433 -0.057715 )
+( -0.838230 0.850716 0.071433 -0.057715 )
+set Transform3[1]:
+( 0.116544 0.116544 0.071433 -0.057715 )
+( -0.007285 -0.007285 0.071433 -0.057715 )
+( -0.838230 -0.838230 0.071433 -0.057715 )
+set Transform3[2]:
+( 0.116544 0.116544 0.116544 -0.057715 )
+( -0.007285 -0.007285 -0.007285 -0.057715 )
+( -0.838230 -0.838230 -0.838230 -0.057715 )
+set Transform3[3]:
+( 0.116544 0.116544 0.116544 0.116544 )
+( -0.007285 -0.007285 -0.007285 -0.007285 )
+( -0.838230 -0.838230 -0.838230 -0.838230 )
+get Transform3[0]: ( 0.116544 -0.007285 -0.838230 )
+get Transform3[1]: ( 0.116544 -0.007285 -0.838230 )
+get Transform3[2]: ( 0.116544 -0.007285 -0.838230 )
+get Transform3[3]: ( 0.116544 -0.007285 -0.838230 )
+Transform3 set elements:
+( 0.007649 -0.271484 -0.836501 -0.287826 )
+( -0.440298 -0.259070 0.229093 0.942655 )
+( 0.994796 0.902149 0.586393 -0.634432 )
+0.007649
+-0.440298
+0.994796
+-0.271484
+-0.259070
+0.902149
+-0.836501
+0.229093
+0.586393
+-0.287826
+0.942655
+-0.634432
+
+ __end__ 
diff --git a/Extras/vectormathlibrary/tests/test2_soa_c.c b/Extras/vectormathlibrary/tests/test2_soa_c.c
new file mode 100644
index 000000000..6c64bf187
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test2_soa_c.c
@@ -0,0 +1,760 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_SOA_TEST
+
+#include "vectormath_soa.h"
+#include "test.h"
+
+int iteration = 0;
+
+void
+Matrix3_methods_test()
+{
+    VmathSoaMatrix3 a_Matrix3, b_Matrix3;
+    VmathSoaMatrix4 a_Matrix4, b_Matrix4;
+    VmathSoaTransform3 a_Transform3, b_Transform3;
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat, tmpQ_0;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7, tmpV3_8, tmpV3_9, tmpV3_10, tmpV3_11, tmpV3_12, tmpV3_13;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &c_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &d_Quat, "set Quat with floats" );
+    vmathSoaM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathSoaM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathSoaM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathSoaM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathSoaM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathSoaT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathSoaT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathSoaT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathSoaQNormalize( &tmpQ_0, &a_Quat );
+    vmathSoaM3MakeFromQ( &a_Matrix3, &tmpQ_0 );
+    vmathSoaM3Prints( &a_Matrix3, "construct Matrix3 with Quat" );
+    vmathSoaQMakeFromM3( &a_Quat, &a_Matrix3 );
+    vmathSoaQPrints( &a_Quat, "construct Quat with Matrix3" );
+    vmathSoaM3Copy( &a_Matrix3, &b_Matrix3 );
+    vmathSoaM3Prints( &a_Matrix3, "assign to Matrix3 from Matrix3" );
+    vmathSoaM3MakeFromScalar( &a_Matrix3, randfloat() );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 with float" );
+    vmathSoaM3MakeFromScalar( &a_Matrix3, randfloat() );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 with float" );
+    vmathSoaM3MakeFromScalar( &a_Matrix3, (vec_float4){0.0f} );
+    vmathSoaM3Prints( &a_Matrix3, "set elements to zero" );
+    vmathSoaM3MakeIdentity( &a_Matrix3 );
+    vmathSoaM3Prints( &a_Matrix3, "set to identity" );
+    vmathSoaM3MakeRotationX( &a_Matrix3, randfloat() );
+    vmathSoaM3Prints( &a_Matrix3, "set to rotationX" );
+    vmathSoaM3MakeRotationY( &a_Matrix3, randfloat() );
+    vmathSoaM3Prints( &a_Matrix3, "set to rotationY" );
+    vmathSoaM3MakeRotationZ( &a_Matrix3, randfloat() );
+    vmathSoaM3Prints( &a_Matrix3, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &tmpV3_0, rndflt3, rndflt2, rndflt1 );
+    vmathSoaM3MakeRotationZYX( &a_Matrix3, &tmpV3_0 );
+    vmathSoaM3Prints( &a_Matrix3, "set to rotation from Z,Y,X angles" );
+    vmathSoaV3Normalize( &tmpV3_1, &a_Vector3 );
+    vmathSoaM3MakeRotationAxis( &a_Matrix3, randfloat(), &tmpV3_1 );
+    vmathSoaM3Prints( &a_Matrix3, "set to rotation from axis angle" );
+    vmathSoaM3SetCol0( &a_Matrix3, &a_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "Matrix3 set col 0" );
+    vmathSoaM3SetCol1( &a_Matrix3, &a_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "Matrix3 set col 1" );
+    vmathSoaM3SetCol2( &a_Matrix3, &a_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "Matrix3 set col 2" );
+    vmathSoaM3GetCol0( &tmpV3_2, &a_Matrix3 );
+    vmathSoaV3Prints( &tmpV3_2, "Matrix3 get col 0" );
+    vmathSoaM3GetCol1( &tmpV3_3, &a_Matrix3 );
+    vmathSoaV3Prints( &tmpV3_3, "Matrix3 get col 1" );
+    vmathSoaM3GetCol2( &tmpV3_4, &a_Matrix3 );
+    vmathSoaV3Prints( &tmpV3_4, "Matrix3 get col 2" );
+    vmathSoaM3SetCol( &a_Matrix3, 0, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "Matrix3 set col 0" );
+    vmathSoaM3SetCol( &a_Matrix3, 1, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "Matrix3 set col 1" );
+    vmathSoaM3SetCol( &a_Matrix3, 2, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "Matrix3 set col 2" );
+    vmathSoaM3GetCol( &tmpV3_5, &a_Matrix3, 0 );
+    vmathSoaV3Prints( &tmpV3_5, "Matrix3 get col 0" );
+    vmathSoaM3GetCol( &tmpV3_6, &a_Matrix3, 1 );
+    vmathSoaV3Prints( &tmpV3_6, "Matrix3 get col 1" );
+    vmathSoaM3GetCol( &tmpV3_7, &a_Matrix3, 2 );
+    vmathSoaV3Prints( &tmpV3_7, "Matrix3 get col 2" );
+    vmathSoaM3SetRow( &a_Matrix3, 0, &a_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "Matrix3 set row 0" );
+    vmathSoaM3SetRow( &a_Matrix3, 1, &a_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "Matrix3 set row 1" );
+    vmathSoaM3SetRow( &a_Matrix3, 2, &a_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "Matrix3 set row 2" );
+    vmathSoaM3GetRow( &tmpV3_8, &a_Matrix3, 0 );
+    vmathSoaV3Prints( &tmpV3_8, "Matrix3 get row 0" );
+    vmathSoaM3GetRow( &tmpV3_9, &a_Matrix3, 1 );
+    vmathSoaV3Prints( &tmpV3_9, "Matrix3 get row 1" );
+    vmathSoaM3GetRow( &tmpV3_10, &a_Matrix3, 2 );
+    vmathSoaV3Prints( &tmpV3_10, "Matrix3 get row 2" );
+    vmathSoaM3SetCol( &a_Matrix3, 0, &a_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set " );
+    vmathSoaM3SetCol( &a_Matrix3, 1, &a_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set " );
+    vmathSoaM3SetCol( &a_Matrix3, 2, &a_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set " );
+    vmathSoaM3GetCol( &tmpV3_11, &a_Matrix3, 0 );
+    vmathSoaV3Prints( &tmpV3_11, "get " );
+    vmathSoaM3GetCol( &tmpV3_12, &a_Matrix3, 1 );
+    vmathSoaV3Prints( &tmpV3_12, "get " );
+    vmathSoaM3GetCol( &tmpV3_13, &a_Matrix3, 2 );
+    vmathSoaV3Prints( &tmpV3_13, "get " );
+    vmathSoaM3SetElem( &a_Matrix3, 0, 0, randfloat() );
+    vmathSoaM3SetElem( &a_Matrix3, 0, 1, randfloat() );
+    vmathSoaM3SetElem( &a_Matrix3, 0, 2, randfloat() );
+    vmathSoaM3SetElem( &a_Matrix3, 1, 0, randfloat() );
+    vmathSoaM3SetElem( &a_Matrix3, 1, 1, randfloat() );
+    vmathSoaM3SetElem( &a_Matrix3, 1, 2, randfloat() );
+    vmathSoaM3SetElem( &a_Matrix3, 2, 0, randfloat() );
+    vmathSoaM3SetElem( &a_Matrix3, 2, 1, randfloat() );
+    vmathSoaM3SetElem( &a_Matrix3, 2, 2, randfloat() );
+    vmathSoaM3Prints( &a_Matrix3, "Matrix3 set elements" );
+    printf("%f\n", getfloat(vmathSoaM3GetElem( &a_Matrix3, 0, 0 )) );
+    printf("%f\n", getfloat(vmathSoaM3GetElem( &a_Matrix3, 0, 1 )) );
+    printf("%f\n", getfloat(vmathSoaM3GetElem( &a_Matrix3, 0, 2 )) );
+    printf("%f\n", getfloat(vmathSoaM3GetElem( &a_Matrix3, 1, 0 )) );
+    printf("%f\n", getfloat(vmathSoaM3GetElem( &a_Matrix3, 1, 1 )) );
+    printf("%f\n", getfloat(vmathSoaM3GetElem( &a_Matrix3, 1, 2 )) );
+    printf("%f\n", getfloat(vmathSoaM3GetElem( &a_Matrix3, 2, 0 )) );
+    printf("%f\n", getfloat(vmathSoaM3GetElem( &a_Matrix3, 2, 1 )) );
+    printf("%f\n", getfloat(vmathSoaM3GetElem( &a_Matrix3, 2, 2 )) );
+}
+
+void
+Matrix4_methods_test()
+{
+    VmathSoaMatrix3 a_Matrix3, b_Matrix3;
+    VmathSoaMatrix4 a_Matrix4, b_Matrix4;
+    VmathSoaTransform3 a_Transform3, b_Transform3;
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathSoaVector3 tmpV3_0, tmpV3_1;
+    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2, tmpV4_3, tmpV4_4, tmpV4_5, tmpV4_6, tmpV4_7, tmpV4_8, tmpV4_9, tmpV4_10, tmpV4_11, tmpV4_12, tmpV4_13, tmpV4_14, tmpV4_15;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &c_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &d_Quat, "set Quat with floats" );
+    vmathSoaM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathSoaM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathSoaM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathSoaM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathSoaM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathSoaT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathSoaT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathSoaT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathSoaM4MakeFromT3( &a_Matrix4, &a_Transform3 );
+    vmathSoaM4Prints( &a_Matrix4, "construct Matrix4 with Transform3" );
+    vmathSoaM4MakeFromM3V3( &a_Matrix4, &a_Matrix3, &a_Vector3 );
+    vmathSoaM4Prints( &a_Matrix4, "construct Matrix4 with Matrix3 and Vector3" );
+    vmathSoaM4MakeFromQV3( &a_Matrix4, &a_Quat, &a_Vector3 );
+    vmathSoaM4Prints( &a_Matrix4, "construct Matrix4 with Quat and Vector3" );
+    vmathSoaM4Copy( &a_Matrix4, &b_Matrix4 );
+    vmathSoaM4Prints( &a_Matrix4, "assign to Matrix4 from Matrix4" );
+    vmathSoaM4MakeFromScalar( &a_Matrix4, randfloat() );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 with float" );
+    vmathSoaM4MakeFromScalar( &a_Matrix4, randfloat() );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 with float" );
+    vmathSoaM4MakeFromScalar( &a_Matrix4, (vec_float4){0.0f} );
+    vmathSoaM4Prints( &a_Matrix4, "set elements to zero" );
+    vmathSoaM4MakeIdentity( &a_Matrix4 );
+    vmathSoaM4Prints( &a_Matrix4, "set to identity" );
+    vmathSoaM4MakeRotationX( &a_Matrix4, randfloat() );
+    vmathSoaM4Prints( &a_Matrix4, "set to rotationX" );
+    vmathSoaM4MakeRotationY( &a_Matrix4, randfloat() );
+    vmathSoaM4Prints( &a_Matrix4, "set to rotationY" );
+    vmathSoaM4MakeRotationZ( &a_Matrix4, randfloat() );
+    vmathSoaM4Prints( &a_Matrix4, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &tmpV3_0, rndflt3, rndflt2, rndflt1 );
+    vmathSoaM4MakeRotationZYX( &a_Matrix4, &tmpV3_0 );
+    vmathSoaM4Prints( &a_Matrix4, "set to rotation from Z,Y,X angles" );
+    vmathSoaV3Normalize( &tmpV3_1, &a_Vector3 );
+    vmathSoaM4MakeRotationAxis( &a_Matrix4, randfloat(), &tmpV3_1 );
+    vmathSoaM4Prints( &a_Matrix4, "set to rotation from axis angle" );
+    vmathSoaM4MakeTranslation( &a_Matrix4, &a_Vector3 );
+    vmathSoaM4Prints( &a_Matrix4, "set to translation" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaM4MakePerspective( &a_Matrix4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaM4Prints( &a_Matrix4, "set to perspective matrix" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaM4MakeFrustum( &a_Matrix4, rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6 );
+    vmathSoaM4Prints( &a_Matrix4, "set to frustum matrix" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaM4MakeOrthographic( &a_Matrix4, rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6 );
+    vmathSoaM4Prints( &a_Matrix4, "set to orthographic matrix" );
+    vmathSoaM4MakeLookAt( &a_Matrix4, &a_Point3, &b_Point3, &a_Vector3 );
+    vmathSoaM4Prints( &a_Matrix4, "set to look-at matrix" );
+    vmathSoaM4SetCol0( &a_Matrix4, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set col 0" );
+    vmathSoaM4SetCol1( &a_Matrix4, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set col 1" );
+    vmathSoaM4SetCol2( &a_Matrix4, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set col 2" );
+    vmathSoaM4SetCol3( &a_Matrix4, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set col 3" );
+    vmathSoaM4GetCol0( &tmpV4_0, &a_Matrix4 );
+    vmathSoaV4Prints( &tmpV4_0, "Matrix4 get col 0" );
+    vmathSoaM4GetCol1( &tmpV4_1, &a_Matrix4 );
+    vmathSoaV4Prints( &tmpV4_1, "Matrix4 get col 1" );
+    vmathSoaM4GetCol2( &tmpV4_2, &a_Matrix4 );
+    vmathSoaV4Prints( &tmpV4_2, "Matrix4 get col 2" );
+    vmathSoaM4GetCol3( &tmpV4_3, &a_Matrix4 );
+    vmathSoaV4Prints( &tmpV4_3, "Matrix4 get col 3" );
+    vmathSoaM4SetCol( &a_Matrix4, 0, &b_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set col 0" );
+    vmathSoaM4SetCol( &a_Matrix4, 1, &b_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set col 1" );
+    vmathSoaM4SetCol( &a_Matrix4, 2, &b_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set col 2" );
+    vmathSoaM4SetCol( &a_Matrix4, 3, &b_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set col 3" );
+    vmathSoaM4GetCol( &tmpV4_4, &a_Matrix4, 0 );
+    vmathSoaV4Prints( &tmpV4_4, "Matrix4 get col 0" );
+    vmathSoaM4GetCol( &tmpV4_5, &a_Matrix4, 1 );
+    vmathSoaV4Prints( &tmpV4_5, "Matrix4 get col 1" );
+    vmathSoaM4GetCol( &tmpV4_6, &a_Matrix4, 2 );
+    vmathSoaV4Prints( &tmpV4_6, "Matrix4 get col 2" );
+    vmathSoaM4GetCol( &tmpV4_7, &a_Matrix4, 3 );
+    vmathSoaV4Prints( &tmpV4_7, "Matrix4 get col 3" );
+    vmathSoaM4SetRow( &a_Matrix4, 0, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set row 0" );
+    vmathSoaM4SetRow( &a_Matrix4, 1, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set row 1" );
+    vmathSoaM4SetRow( &a_Matrix4, 2, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set row 2" );
+    vmathSoaM4SetRow( &a_Matrix4, 3, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set row 3" );
+    vmathSoaM4GetRow( &tmpV4_8, &a_Matrix4, 0 );
+    vmathSoaV4Prints( &tmpV4_8, "Matrix4 get row 0" );
+    vmathSoaM4GetRow( &tmpV4_9, &a_Matrix4, 1 );
+    vmathSoaV4Prints( &tmpV4_9, "Matrix4 get row 1" );
+    vmathSoaM4GetRow( &tmpV4_10, &a_Matrix4, 2 );
+    vmathSoaV4Prints( &tmpV4_10, "Matrix4 get row 2" );
+    vmathSoaM4GetRow( &tmpV4_11, &a_Matrix4, 3 );
+    vmathSoaV4Prints( &tmpV4_11, "Matrix4 get row 3" );
+    vmathSoaM4SetCol( &a_Matrix4, 0, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set " );
+    vmathSoaM4SetCol( &a_Matrix4, 1, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set " );
+    vmathSoaM4SetCol( &a_Matrix4, 2, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set " );
+    vmathSoaM4SetCol( &a_Matrix4, 3, &a_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set " );
+    vmathSoaM4GetCol( &tmpV4_12, &a_Matrix4, 0 );
+    vmathSoaV4Prints( &tmpV4_12, "get " );
+    vmathSoaM4GetCol( &tmpV4_13, &a_Matrix4, 1 );
+    vmathSoaV4Prints( &tmpV4_13, "get " );
+    vmathSoaM4GetCol( &tmpV4_14, &a_Matrix4, 2 );
+    vmathSoaV4Prints( &tmpV4_14, "get " );
+    vmathSoaM4GetCol( &tmpV4_15, &a_Matrix4, 3 );
+    vmathSoaV4Prints( &tmpV4_15, "get " );
+    vmathSoaM4SetElem( &a_Matrix4, 0, 0, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 0, 1, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 0, 2, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 0, 3, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 1, 0, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 1, 1, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 1, 2, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 1, 3, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 2, 0, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 2, 1, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 2, 2, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 2, 3, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 3, 0, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 3, 1, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 3, 2, randfloat() );
+    vmathSoaM4SetElem( &a_Matrix4, 3, 3, randfloat() );
+    vmathSoaM4Prints( &a_Matrix4, "Matrix4 set elements" );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 0, 0 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 0, 1 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 0, 2 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 0, 3 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 1, 0 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 1, 1 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 1, 2 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 1, 3 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 2, 0 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 2, 1 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 2, 2 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 2, 3 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 3, 0 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 3, 1 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 3, 2 )) );
+    printf("%f\n", getfloat(vmathSoaM4GetElem( &a_Matrix4, 3, 3 )) );
+}
+
+void
+Transform3_methods_test()
+{
+    VmathSoaMatrix3 a_Matrix3, b_Matrix3;
+    VmathSoaMatrix4 a_Matrix4, b_Matrix4;
+    VmathSoaTransform3 a_Transform3, b_Transform3;
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathSoaVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7, tmpV3_8, tmpV3_9;
+    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2;
+    VmathSoaVector3 tmpV3_10, tmpV3_11, tmpV3_12, tmpV3_13;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &c_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &d_Quat, "set Quat with floats" );
+    vmathSoaM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathSoaM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathSoaM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathSoaM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathSoaM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathSoaT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathSoaT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathSoaT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathSoaT3MakeFromM3V3( &a_Transform3, &a_Matrix3, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "construct Transform3 with Matrix3 and Vector3" );
+    vmathSoaT3MakeFromQV3( &a_Transform3, &a_Quat, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "construct Transform3 with Quat and Vector3" );
+    vmathSoaT3Copy( &a_Transform3, &b_Transform3 );
+    vmathSoaT3Prints( &a_Transform3, "assign to Transform3 from Transform3" );
+    vmathSoaT3MakeFromScalar( &a_Transform3, randfloat() );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 with float" );
+    vmathSoaT3MakeFromScalar( &a_Transform3, randfloat() );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 with float" );
+    vmathSoaT3MakeFromScalar( &a_Transform3, (vec_float4){0.0f} );
+    vmathSoaT3Prints( &a_Transform3, "set elements to zero" );
+    vmathSoaT3MakeIdentity( &a_Transform3 );
+    vmathSoaT3Prints( &a_Transform3, "set to identity" );
+    vmathSoaT3MakeRotationX( &a_Transform3, randfloat() );
+    vmathSoaT3Prints( &a_Transform3, "set to rotationX" );
+    vmathSoaT3MakeRotationY( &a_Transform3, randfloat() );
+    vmathSoaT3Prints( &a_Transform3, "set to rotationY" );
+    vmathSoaT3MakeRotationZ( &a_Transform3, randfloat() );
+    vmathSoaT3Prints( &a_Transform3, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &tmpV3_0, rndflt3, rndflt2, rndflt1 );
+    vmathSoaT3MakeRotationZYX( &a_Transform3, &tmpV3_0 );
+    vmathSoaT3Prints( &a_Transform3, "set to rotation from Z,Y,X angles" );
+    vmathSoaV3Normalize( &tmpV3_1, &a_Vector3 );
+    vmathSoaT3MakeRotationAxis( &a_Transform3, randfloat(), &tmpV3_1 );
+    vmathSoaT3Prints( &a_Transform3, "set to rotation from axis angle" );
+    vmathSoaT3MakeTranslation( &a_Transform3, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set to translation" );
+    vmathSoaT3SetCol0( &a_Transform3, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set col 0" );
+    vmathSoaT3SetCol1( &a_Transform3, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set col 1" );
+    vmathSoaT3SetCol2( &a_Transform3, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set col 2" );
+    vmathSoaT3SetCol3( &a_Transform3, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set col 3" );
+    vmathSoaT3GetCol0( &tmpV3_2, &a_Transform3 );
+    vmathSoaV3Prints( &tmpV3_2, "Transform3 get col 0" );
+    vmathSoaT3GetCol1( &tmpV3_3, &a_Transform3 );
+    vmathSoaV3Prints( &tmpV3_3, "Transform3 get col 1" );
+    vmathSoaT3GetCol2( &tmpV3_4, &a_Transform3 );
+    vmathSoaV3Prints( &tmpV3_4, "Transform3 get col 2" );
+    vmathSoaT3GetCol3( &tmpV3_5, &a_Transform3 );
+    vmathSoaV3Prints( &tmpV3_5, "Transform3 get col 3" );
+    vmathSoaT3SetCol( &a_Transform3, 0, &b_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set col 0" );
+    vmathSoaT3SetCol( &a_Transform3, 1, &b_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set col 1" );
+    vmathSoaT3SetCol( &a_Transform3, 2, &b_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set col 2" );
+    vmathSoaT3SetCol( &a_Transform3, 3, &b_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set col 3" );
+    vmathSoaT3GetCol( &tmpV3_6, &a_Transform3, 0 );
+    vmathSoaV3Prints( &tmpV3_6, "Transform3 get col 0" );
+    vmathSoaT3GetCol( &tmpV3_7, &a_Transform3, 1 );
+    vmathSoaV3Prints( &tmpV3_7, "Transform3 get col 1" );
+    vmathSoaT3GetCol( &tmpV3_8, &a_Transform3, 2 );
+    vmathSoaV3Prints( &tmpV3_8, "Transform3 get col 2" );
+    vmathSoaT3GetCol( &tmpV3_9, &a_Transform3, 3 );
+    vmathSoaV3Prints( &tmpV3_9, "Transform3 get col 3" );
+    vmathSoaT3SetRow( &a_Transform3, 0, &a_Vector4 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set row 0" );
+    vmathSoaT3SetRow( &a_Transform3, 1, &a_Vector4 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set row 1" );
+    vmathSoaT3SetRow( &a_Transform3, 2, &a_Vector4 );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set row 2" );
+    vmathSoaT3GetRow( &tmpV4_0, &a_Transform3, 0 );
+    vmathSoaV4Prints( &tmpV4_0, "Transform3 get row 0" );
+    vmathSoaT3GetRow( &tmpV4_1, &a_Transform3, 1 );
+    vmathSoaV4Prints( &tmpV4_1, "Transform3 get row 1" );
+    vmathSoaT3GetRow( &tmpV4_2, &a_Transform3, 2 );
+    vmathSoaV4Prints( &tmpV4_2, "Transform3 get row 2" );
+    vmathSoaT3SetCol( &a_Transform3, 0, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set " );
+    vmathSoaT3SetCol( &a_Transform3, 1, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set " );
+    vmathSoaT3SetCol( &a_Transform3, 2, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set " );
+    vmathSoaT3SetCol( &a_Transform3, 3, &a_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set " );
+    vmathSoaT3GetCol( &tmpV3_10, &a_Transform3, 0 );
+    vmathSoaV3Prints( &tmpV3_10, "get " );
+    vmathSoaT3GetCol( &tmpV3_11, &a_Transform3, 1 );
+    vmathSoaV3Prints( &tmpV3_11, "get " );
+    vmathSoaT3GetCol( &tmpV3_12, &a_Transform3, 2 );
+    vmathSoaV3Prints( &tmpV3_12, "get " );
+    vmathSoaT3GetCol( &tmpV3_13, &a_Transform3, 3 );
+    vmathSoaV3Prints( &tmpV3_13, "get " );
+    vmathSoaT3SetElem( &a_Transform3, 0, 0, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 0, 1, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 0, 2, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 1, 0, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 1, 1, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 1, 2, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 2, 0, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 2, 1, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 2, 2, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 3, 0, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 3, 1, randfloat() );
+    vmathSoaT3SetElem( &a_Transform3, 3, 2, randfloat() );
+    vmathSoaT3Prints( &a_Transform3, "Transform3 set elements" );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 0, 0 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 0, 1 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 0, 2 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 1, 0 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 1, 1 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 1, 2 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 2, 0 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 2, 1 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 2, 2 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 3, 0 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 3, 1 )) );
+    printf("%f\n", getfloat(vmathSoaT3GetElem( &a_Transform3, 3, 2 )) );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test2_soa_cpp.cpp b/Extras/vectormathlibrary/tests/test2_soa_cpp.cpp
new file mode 100644
index 000000000..6ec58159b
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test2_soa_cpp.cpp
@@ -0,0 +1,718 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_SOA_TEST
+
+#include "vectormath_soa.h"
+#include "test.h"
+
+int iteration = 0;
+
+using namespace Vectormath;
+using namespace Vectormath::Soa;
+
+void
+Matrix3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    a_Matrix3 = Matrix3( normalize( a_Quat ) );
+    print( a_Matrix3, "construct Matrix3 with Quat" );
+    a_Quat = Quat( a_Matrix3 );
+    print( a_Quat, "construct Quat with Matrix3" );
+    a_Matrix3 = b_Matrix3;
+    print( a_Matrix3, "assign to Matrix3 from Matrix3" );
+    a_Matrix3 = Matrix3( randfloat() );
+    print( a_Matrix3, "set Matrix3 with float" );
+    a_Matrix3 = Matrix3( randfloat() );
+    print( a_Matrix3, "set Matrix3 with float" );
+    a_Matrix3 = Matrix3( (vec_float4){0.0f} );
+    print( a_Matrix3, "set elements to zero" );
+    a_Matrix3 = Matrix3::identity( );
+    print( a_Matrix3, "set to identity" );
+    a_Matrix3 = Matrix3::rotationX( randfloat() );
+    print( a_Matrix3, "set to rotationX" );
+    a_Matrix3 = Matrix3::rotationY( randfloat() );
+    print( a_Matrix3, "set to rotationY" );
+    a_Matrix3 = Matrix3::rotationZ( randfloat() );
+    print( a_Matrix3, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Matrix3 = Matrix3::rotationZYX( Vector3( rndflt3, rndflt2, rndflt1 ) );
+    print( a_Matrix3, "set to rotation from Z,Y,X angles" );
+    a_Matrix3 = Matrix3::rotation( randfloat(), normalize( a_Vector3 ) );
+    print( a_Matrix3, "set to rotation from axis angle" );
+    a_Matrix3.setCol0( a_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 0" );
+    a_Matrix3.setCol1( a_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 1" );
+    a_Matrix3.setCol2( a_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 2" );
+    print( a_Matrix3.getCol0( ), "Matrix3 get col 0" );
+    print( a_Matrix3.getCol1( ), "Matrix3 get col 1" );
+    print( a_Matrix3.getCol2( ), "Matrix3 get col 2" );
+    a_Matrix3.setCol( 0, b_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 0" );
+    a_Matrix3.setCol( 1, b_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 1" );
+    a_Matrix3.setCol( 2, b_Vector3 );
+    print( a_Matrix3, "Matrix3 set col 2" );
+    print( a_Matrix3.getCol( 0 ), "Matrix3 get col 0" );
+    print( a_Matrix3.getCol( 1 ), "Matrix3 get col 1" );
+    print( a_Matrix3.getCol( 2 ), "Matrix3 get col 2" );
+    a_Matrix3.setRow( 0, a_Vector3 );
+    print( a_Matrix3, "Matrix3 set row 0" );
+    a_Matrix3.setRow( 1, a_Vector3 );
+    print( a_Matrix3, "Matrix3 set row 1" );
+    a_Matrix3.setRow( 2, a_Vector3 );
+    print( a_Matrix3, "Matrix3 set row 2" );
+    print( a_Matrix3.getRow( 0 ), "Matrix3 get row 0" );
+    print( a_Matrix3.getRow( 1 ), "Matrix3 get row 1" );
+    print( a_Matrix3.getRow( 2 ), "Matrix3 get row 2" );
+    a_Matrix3[0] = a_Vector3;
+    print( a_Matrix3, "set Matrix3[0]" );
+    a_Matrix3[1] = a_Vector3;
+    print( a_Matrix3, "set Matrix3[1]" );
+    a_Matrix3[2] = a_Vector3;
+    print( a_Matrix3, "set Matrix3[2]" );
+    a_Matrix3[0] = a_Vector3;
+    print( a_Matrix3[0], "get Matrix3[0]" );
+    a_Matrix3[1] = a_Vector3;
+    print( a_Matrix3[1], "get Matrix3[1]" );
+    a_Matrix3[2] = a_Vector3;
+    print( a_Matrix3[2], "get Matrix3[2]" );
+    a_Matrix3.setElem( 0, 0, randfloat() );
+    a_Matrix3.setElem( 0, 1, randfloat() );
+    a_Matrix3.setElem( 0, 2, randfloat() );
+    a_Matrix3.setElem( 1, 0, randfloat() );
+    a_Matrix3.setElem( 1, 1, randfloat() );
+    a_Matrix3.setElem( 1, 2, randfloat() );
+    a_Matrix3.setElem( 2, 0, randfloat() );
+    a_Matrix3.setElem( 2, 1, randfloat() );
+    a_Matrix3.setElem( 2, 2, randfloat() );
+    print( a_Matrix3, "Matrix3 set elements" );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 0, 0 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 0, 1 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 0, 2 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 1, 0 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 1, 1 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 1, 2 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 2, 0 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 2, 1 )) );
+    printf("%f\n", getfloat(a_Matrix3.getElem( 2, 2 )) );
+}
+
+void
+Matrix4_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    a_Matrix4 = Matrix4( a_Transform3 );
+    print( a_Matrix4, "construct Matrix4 with Transform3" );
+    a_Matrix4 = Matrix4( a_Matrix3, a_Vector3 );
+    print( a_Matrix4, "construct Matrix4 with Matrix3 and Vector3" );
+    a_Matrix4 = Matrix4( a_Quat, a_Vector3 );
+    print( a_Matrix4, "construct Matrix4 with Quat and Vector3" );
+    a_Matrix4 = b_Matrix4;
+    print( a_Matrix4, "assign to Matrix4 from Matrix4" );
+    a_Matrix4 = Matrix4( randfloat() );
+    print( a_Matrix4, "set Matrix4 with float" );
+    a_Matrix4 = Matrix4( randfloat() );
+    print( a_Matrix4, "set Matrix4 with float" );
+    a_Matrix4 = Matrix4( (vec_float4){0.0f} );
+    print( a_Matrix4, "set elements to zero" );
+    a_Matrix4 = Matrix4::identity( );
+    print( a_Matrix4, "set to identity" );
+    a_Matrix4 = Matrix4::rotationX( randfloat() );
+    print( a_Matrix4, "set to rotationX" );
+    a_Matrix4 = Matrix4::rotationY( randfloat() );
+    print( a_Matrix4, "set to rotationY" );
+    a_Matrix4 = Matrix4::rotationZ( randfloat() );
+    print( a_Matrix4, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Matrix4 = Matrix4::rotationZYX( Vector3( rndflt3, rndflt2, rndflt1 ) );
+    print( a_Matrix4, "set to rotation from Z,Y,X angles" );
+    a_Matrix4 = Matrix4::rotation( randfloat(), normalize( a_Vector3 ) );
+    print( a_Matrix4, "set to rotation from axis angle" );
+    a_Matrix4 = Matrix4::translation( a_Vector3 );
+    print( a_Matrix4, "set to translation" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Matrix4 = Matrix4::perspective( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Matrix4, "set to perspective matrix" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    a_Matrix4 = Matrix4::frustum( rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6 );
+    print( a_Matrix4, "set to frustum matrix" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    a_Matrix4 = Matrix4::orthographic( rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6 );
+    print( a_Matrix4, "set to orthographic matrix" );
+    a_Matrix4 = Matrix4::lookAt( a_Point3, b_Point3, a_Vector3 );
+    print( a_Matrix4, "set to look-at matrix" );
+    a_Matrix4.setCol0( a_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 0" );
+    a_Matrix4.setCol1( a_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 1" );
+    a_Matrix4.setCol2( a_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 2" );
+    a_Matrix4.setCol3( a_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 3" );
+    print( a_Matrix4.getCol0( ), "Matrix4 get col 0" );
+    print( a_Matrix4.getCol1( ), "Matrix4 get col 1" );
+    print( a_Matrix4.getCol2( ), "Matrix4 get col 2" );
+    print( a_Matrix4.getCol3( ), "Matrix4 get col 3" );
+    a_Matrix4.setCol( 0, b_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 0" );
+    a_Matrix4.setCol( 1, b_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 1" );
+    a_Matrix4.setCol( 2, b_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 2" );
+    a_Matrix4.setCol( 3, b_Vector4 );
+    print( a_Matrix4, "Matrix4 set col 3" );
+    print( a_Matrix4.getCol( 0 ), "Matrix4 get col 0" );
+    print( a_Matrix4.getCol( 1 ), "Matrix4 get col 1" );
+    print( a_Matrix4.getCol( 2 ), "Matrix4 get col 2" );
+    print( a_Matrix4.getCol( 3 ), "Matrix4 get col 3" );
+    a_Matrix4.setRow( 0, a_Vector4 );
+    print( a_Matrix4, "Matrix4 set row 0" );
+    a_Matrix4.setRow( 1, a_Vector4 );
+    print( a_Matrix4, "Matrix4 set row 1" );
+    a_Matrix4.setRow( 2, a_Vector4 );
+    print( a_Matrix4, "Matrix4 set row 2" );
+    a_Matrix4.setRow( 3, a_Vector4 );
+    print( a_Matrix4, "Matrix4 set row 3" );
+    print( a_Matrix4.getRow( 0 ), "Matrix4 get row 0" );
+    print( a_Matrix4.getRow( 1 ), "Matrix4 get row 1" );
+    print( a_Matrix4.getRow( 2 ), "Matrix4 get row 2" );
+    print( a_Matrix4.getRow( 3 ), "Matrix4 get row 3" );
+    a_Matrix4[0] = a_Vector4;
+    print( a_Matrix4, "set Matrix4[0]" );
+    a_Matrix4[1] = a_Vector4;
+    print( a_Matrix4, "set Matrix4[1]" );
+    a_Matrix4[2] = a_Vector4;
+    print( a_Matrix4, "set Matrix4[2]" );
+    a_Matrix4[3] = a_Vector4;
+    print( a_Matrix4, "set Matrix4[3]" );
+    a_Matrix4[0] = a_Vector4;
+    print( a_Matrix4[0], "get Matrix4[0]" );
+    a_Matrix4[1] = a_Vector4;
+    print( a_Matrix4[1], "get Matrix4[1]" );
+    a_Matrix4[2] = a_Vector4;
+    print( a_Matrix4[2], "get Matrix4[2]" );
+    a_Matrix4[3] = a_Vector4;
+    print( a_Matrix4[3], "get Matrix4[3]" );
+    a_Matrix4.setElem( 0, 0, randfloat() );
+    a_Matrix4.setElem( 0, 1, randfloat() );
+    a_Matrix4.setElem( 0, 2, randfloat() );
+    a_Matrix4.setElem( 0, 3, randfloat() );
+    a_Matrix4.setElem( 1, 0, randfloat() );
+    a_Matrix4.setElem( 1, 1, randfloat() );
+    a_Matrix4.setElem( 1, 2, randfloat() );
+    a_Matrix4.setElem( 1, 3, randfloat() );
+    a_Matrix4.setElem( 2, 0, randfloat() );
+    a_Matrix4.setElem( 2, 1, randfloat() );
+    a_Matrix4.setElem( 2, 2, randfloat() );
+    a_Matrix4.setElem( 2, 3, randfloat() );
+    a_Matrix4.setElem( 3, 0, randfloat() );
+    a_Matrix4.setElem( 3, 1, randfloat() );
+    a_Matrix4.setElem( 3, 2, randfloat() );
+    a_Matrix4.setElem( 3, 3, randfloat() );
+    print( a_Matrix4, "Matrix4 set elements" );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 0, 0 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 0, 1 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 0, 2 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 0, 3 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 1, 0 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 1, 1 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 1, 2 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 1, 3 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 2, 0 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 2, 1 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 2, 2 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 2, 3 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 3, 0 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 3, 1 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 3, 2 )) );
+    printf("%f\n", getfloat(a_Matrix4.getElem( 3, 3 )) );
+}
+
+void
+Transform3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    a_Transform3 = Transform3( a_Matrix3, a_Vector3 );
+    print( a_Transform3, "construct Transform3 with Matrix3 and Vector3" );
+    a_Transform3 = Transform3( a_Quat, a_Vector3 );
+    print( a_Transform3, "construct Transform3 with Quat and Vector3" );
+    a_Transform3 = b_Transform3;
+    print( a_Transform3, "assign to Transform3 from Transform3" );
+    a_Transform3 = Transform3( randfloat() );
+    print( a_Transform3, "set Transform3 with float" );
+    a_Transform3 = Transform3( randfloat() );
+    print( a_Transform3, "set Transform3 with float" );
+    a_Transform3 = Transform3( (vec_float4){0.0f} );
+    print( a_Transform3, "set elements to zero" );
+    a_Transform3 = Transform3::identity( );
+    print( a_Transform3, "set to identity" );
+    a_Transform3 = Transform3::rotationX( randfloat() );
+    print( a_Transform3, "set to rotationX" );
+    a_Transform3 = Transform3::rotationY( randfloat() );
+    print( a_Transform3, "set to rotationY" );
+    a_Transform3 = Transform3::rotationZ( randfloat() );
+    print( a_Transform3, "set to rotationZ" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Transform3 = Transform3::rotationZYX( Vector3( rndflt3, rndflt2, rndflt1 ) );
+    print( a_Transform3, "set to rotation from Z,Y,X angles" );
+    a_Transform3 = Transform3::rotation( randfloat(), normalize( a_Vector3 ) );
+    print( a_Transform3, "set to rotation from axis angle" );
+    a_Transform3 = Transform3::translation( a_Vector3 );
+    print( a_Transform3, "set to translation" );
+    a_Transform3.setCol0( a_Vector3 );
+    print( a_Transform3, "Transform3 set col 0" );
+    a_Transform3.setCol1( a_Vector3 );
+    print( a_Transform3, "Transform3 set col 1" );
+    a_Transform3.setCol2( a_Vector3 );
+    print( a_Transform3, "Transform3 set col 2" );
+    a_Transform3.setCol3( a_Vector3 );
+    print( a_Transform3, "Transform3 set col 3" );
+    print( a_Transform3.getCol0( ), "Transform3 get col 0" );
+    print( a_Transform3.getCol1( ), "Transform3 get col 1" );
+    print( a_Transform3.getCol2( ), "Transform3 get col 2" );
+    print( a_Transform3.getCol3( ), "Transform3 get col 3" );
+    a_Transform3.setCol( 0, b_Vector3 );
+    print( a_Transform3, "Transform3 set col 0" );
+    a_Transform3.setCol( 1, b_Vector3 );
+    print( a_Transform3, "Transform3 set col 1" );
+    a_Transform3.setCol( 2, b_Vector3 );
+    print( a_Transform3, "Transform3 set col 2" );
+    a_Transform3.setCol( 3, b_Vector3 );
+    print( a_Transform3, "Transform3 set col 3" );
+    print( a_Transform3.getCol( 0 ), "Transform3 get col 0" );
+    print( a_Transform3.getCol( 1 ), "Transform3 get col 1" );
+    print( a_Transform3.getCol( 2 ), "Transform3 get col 2" );
+    print( a_Transform3.getCol( 3 ), "Transform3 get col 3" );
+    a_Transform3.setRow( 0, a_Vector4 );
+    print( a_Transform3, "Transform3 set row 0" );
+    a_Transform3.setRow( 1, a_Vector4 );
+    print( a_Transform3, "Transform3 set row 1" );
+    a_Transform3.setRow( 2, a_Vector4 );
+    print( a_Transform3, "Transform3 set row 2" );
+    print( a_Transform3.getRow( 0 ), "Transform3 get row 0" );
+    print( a_Transform3.getRow( 1 ), "Transform3 get row 1" );
+    print( a_Transform3.getRow( 2 ), "Transform3 get row 2" );
+    a_Transform3[0] = a_Vector3;
+    print( a_Transform3, "set Transform3[0]" );
+    a_Transform3[1] = a_Vector3;
+    print( a_Transform3, "set Transform3[1]" );
+    a_Transform3[2] = a_Vector3;
+    print( a_Transform3, "set Transform3[2]" );
+    a_Transform3[3] = a_Vector3;
+    print( a_Transform3, "set Transform3[3]" );
+    a_Transform3[0] = a_Vector3;
+    print( a_Transform3[0], "get Transform3[0]" );
+    a_Transform3[1] = a_Vector3;
+    print( a_Transform3[1], "get Transform3[1]" );
+    a_Transform3[2] = a_Vector3;
+    print( a_Transform3[2], "get Transform3[2]" );
+    a_Transform3[3] = a_Vector3;
+    print( a_Transform3[3], "get Transform3[3]" );
+    a_Transform3.setElem( 0, 0, randfloat() );
+    a_Transform3.setElem( 0, 1, randfloat() );
+    a_Transform3.setElem( 0, 2, randfloat() );
+    a_Transform3.setElem( 1, 0, randfloat() );
+    a_Transform3.setElem( 1, 1, randfloat() );
+    a_Transform3.setElem( 1, 2, randfloat() );
+    a_Transform3.setElem( 2, 0, randfloat() );
+    a_Transform3.setElem( 2, 1, randfloat() );
+    a_Transform3.setElem( 2, 2, randfloat() );
+    a_Transform3.setElem( 3, 0, randfloat() );
+    a_Transform3.setElem( 3, 1, randfloat() );
+    a_Transform3.setElem( 3, 2, randfloat() );
+    print( a_Transform3, "Transform3 set elements" );
+    printf("%f\n", getfloat(a_Transform3.getElem( 0, 0 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 0, 1 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 0, 2 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 1, 0 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 1, 1 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 1, 2 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 2, 0 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 2, 1 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 2, 2 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 3, 0 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 3, 1 )) );
+    printf("%f\n", getfloat(a_Transform3.getElem( 3, 2 )) );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test3_aos_c.c b/Extras/vectormathlibrary/tests/test3_aos_c.c
new file mode 100644
index 000000000..22afb694f
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test3_aos_c.c
@@ -0,0 +1,524 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_AOS_TEST
+
+#include "vectormath_aos.h"
+#include "test.h"
+
+int iteration = 0;
+
+void
+Matrix3_methods_test()
+{
+    VmathMatrix3 a_Matrix3, b_Matrix3;
+    VmathMatrix4 a_Matrix4, b_Matrix4;
+    VmathTransform3 a_Transform3, b_Transform3;
+    VmathMatrix3 tmpM3_0, tmpM3_1, tmpM3_2, tmpM3_3, tmpM3_4, tmpM3_5;
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7, tmpV3_8;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &c_Vector3, pad );
+    vmathV4GetXYZ( &c_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &d_Vector3, pad );
+    vmathV4GetXYZ( &d_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathV3MakeFromP3( &tmpV3_4, &c_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_4, pad );
+    vmathV4GetXYZ( &tmpV3_5, &tmpV4 );
+    vmathP3MakeFromV3( &c_Point3, &tmpV3_5 );
+    vmathV3MakeFromP3( &tmpV3_6, &d_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_6, pad );
+    vmathV4GetXYZ( &tmpV3_7, &tmpV4 );
+    vmathP3MakeFromV3( &d_Point3, &tmpV3_7 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQPrints( &c_Quat, "set Quat with floats" );
+    vmathQPrints( &d_Quat, "set Quat with floats" );
+    vmathM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathM3Add( &tmpM3_0, &a_Matrix3, &b_Matrix3 );
+    vmathM3Prints( &tmpM3_0, "Matrix3 + Matrix3" );
+    vmathM3Sub( &tmpM3_1, &a_Matrix3, &b_Matrix3 );
+    vmathM3Prints( &tmpM3_1, "Matrix3 - Matrix3" );
+    vmathM3Neg( &tmpM3_2, &a_Matrix3 );
+    vmathM3Prints( &tmpM3_2, "-Matrix3" );
+    vmathM3ScalarMul( &tmpM3_3, &a_Matrix3, randfloat() );
+    vmathM3Prints( &tmpM3_3, "Matrix3 * float" );
+    vmathM3ScalarMul( &tmpM3_4, &a_Matrix3, randfloat() );
+    vmathM3Prints( &tmpM3_4, "float * Matrix3" );
+    vmathM3MulV3( &tmpV3_8, &a_Matrix3, &a_Vector3 );
+    vmathV3Prints( &tmpV3_8, "Matrix3 * Vector3" );
+    vmathM3Mul( &tmpM3_5, &a_Matrix3, &b_Matrix3 );
+    vmathM3Prints( &tmpM3_5, "Matrix3 * Matrix3" );
+}
+
+void
+Matrix4_methods_test()
+{
+    VmathMatrix3 a_Matrix3, b_Matrix3;
+    VmathMatrix4 a_Matrix4, b_Matrix4;
+    VmathTransform3 a_Transform3, b_Transform3;
+    VmathMatrix4 tmpM4_0, tmpM4_1, tmpM4_2, tmpM4_3, tmpM4_4, tmpM4_5, tmpM4_6;
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7;
+    VmathVector4 tmpV4_0, tmpV4_1, tmpV4_2;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &c_Vector3, pad );
+    vmathV4GetXYZ( &c_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &d_Vector3, pad );
+    vmathV4GetXYZ( &d_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathV3MakeFromP3( &tmpV3_4, &c_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_4, pad );
+    vmathV4GetXYZ( &tmpV3_5, &tmpV4 );
+    vmathP3MakeFromV3( &c_Point3, &tmpV3_5 );
+    vmathV3MakeFromP3( &tmpV3_6, &d_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_6, pad );
+    vmathV4GetXYZ( &tmpV3_7, &tmpV4 );
+    vmathP3MakeFromV3( &d_Point3, &tmpV3_7 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQPrints( &c_Quat, "set Quat with floats" );
+    vmathQPrints( &d_Quat, "set Quat with floats" );
+    vmathM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathM4Add( &tmpM4_0, &a_Matrix4, &b_Matrix4 );
+    vmathM4Prints( &tmpM4_0, "Matrix4 + Matrix4" );
+    vmathM4Sub( &tmpM4_1, &a_Matrix4, &b_Matrix4 );
+    vmathM4Prints( &tmpM4_1, "Matrix4 - Matrix4" );
+    vmathM4Neg( &tmpM4_2, &a_Matrix4 );
+    vmathM4Prints( &tmpM4_2, "-Matrix4" );
+    vmathM4ScalarMul( &tmpM4_3, &a_Matrix4, randfloat() );
+    vmathM4Prints( &tmpM4_3, "Matrix4 * float" );
+    vmathM4ScalarMul( &tmpM4_4, &a_Matrix4, randfloat() );
+    vmathM4Prints( &tmpM4_4, "float * Matrix4" );
+    vmathM4MulV4( &tmpV4_0, &a_Matrix4, &a_Vector4 );
+    vmathV4Prints( &tmpV4_0, "Matrix4 * Vector4" );
+    vmathM4MulV3( &tmpV4_1, &a_Matrix4, &a_Vector3 );
+    vmathV4Prints( &tmpV4_1, "Matrix4 * Vector3" );
+    vmathM4MulP3( &tmpV4_2, &a_Matrix4, &a_Point3 );
+    vmathV4Prints( &tmpV4_2, "Matrix4 * Point3" );
+    vmathM4Mul( &tmpM4_5, &a_Matrix4, &b_Matrix4 );
+    vmathM4Prints( &tmpM4_5, "Matrix4 * Matrix4" );
+    vmathM4MulT3( &tmpM4_6, &a_Matrix4, &b_Transform3 );
+    vmathM4Prints( &tmpM4_6, "Matrix4 * Transform3" );
+}
+
+void
+Transform3_methods_test()
+{
+    VmathMatrix3 a_Matrix3, b_Matrix3;
+    VmathMatrix4 a_Matrix4, b_Matrix4;
+    VmathTransform3 a_Transform3, b_Transform3, tmpT3_0;
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7, tmpV3_8;
+    VmathPoint3 tmpP3_0;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &c_Vector3, pad );
+    vmathV4GetXYZ( &c_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &d_Vector3, pad );
+    vmathV4GetXYZ( &d_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathV3MakeFromP3( &tmpV3_4, &c_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_4, pad );
+    vmathV4GetXYZ( &tmpV3_5, &tmpV4 );
+    vmathP3MakeFromV3( &c_Point3, &tmpV3_5 );
+    vmathV3MakeFromP3( &tmpV3_6, &d_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_6, pad );
+    vmathV4GetXYZ( &tmpV3_7, &tmpV4 );
+    vmathP3MakeFromV3( &d_Point3, &tmpV3_7 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQPrints( &c_Quat, "set Quat with floats" );
+    vmathQPrints( &d_Quat, "set Quat with floats" );
+    vmathM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathT3MulV3( &tmpV3_8, &a_Transform3, &a_Vector3 );
+    vmathV3Prints( &tmpV3_8, "Transform3 * Vector3" );
+    vmathT3MulP3( &tmpP3_0, &a_Transform3, &a_Point3 );
+    vmathP3Prints( &tmpP3_0, "Transform3 * Point3" );
+    vmathT3Mul( &tmpT3_0, &a_Transform3, &b_Transform3 );
+    vmathT3Prints( &tmpT3_0, "Transform3 * Transform3" );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test3_aos_cpp.cpp b/Extras/vectormathlibrary/tests/test3_aos_cpp.cpp
new file mode 100644
index 000000000..09427d421
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test3_aos_cpp.cpp
@@ -0,0 +1,476 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_AOS_TEST
+
+#include "vectormath_aos.h"
+#include "test.h"
+
+int iteration = 0;
+
+using namespace Vectormath;
+using namespace Vectormath::Aos;
+
+void
+Matrix3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    Vector4 tmpV4;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( c_Vector3, pad );
+    c_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( d_Vector3, pad );
+    d_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( c_Point3 ), pad );
+    c_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( d_Point3 ), pad );
+    d_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( ( a_Matrix3 + b_Matrix3 ), "Matrix3 + Matrix3" );
+    print( ( a_Matrix3 - b_Matrix3 ), "Matrix3 - Matrix3" );
+    print( ( -a_Matrix3 ), "-Matrix3" );
+    print( ( a_Matrix3 * randfloat() ), "Matrix3 * float" );
+    print( ( randfloat() * a_Matrix3 ), "float * Matrix3" );
+    print( ( a_Matrix3 * a_Vector3 ), "Matrix3 * Vector3" );
+    print( ( a_Matrix3 * b_Matrix3 ), "Matrix3 * Matrix3" );
+}
+
+void
+Matrix4_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    Vector4 tmpV4;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( c_Vector3, pad );
+    c_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( d_Vector3, pad );
+    d_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( c_Point3 ), pad );
+    c_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( d_Point3 ), pad );
+    d_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( ( a_Matrix4 + b_Matrix4 ), "Matrix4 + Matrix4" );
+    print( ( a_Matrix4 - b_Matrix4 ), "Matrix4 - Matrix4" );
+    print( ( -a_Matrix4 ), "-Matrix4" );
+    print( ( a_Matrix4 * randfloat() ), "Matrix4 * float" );
+    print( ( randfloat() * a_Matrix4 ), "float * Matrix4" );
+    print( ( a_Matrix4 * a_Vector4 ), "Matrix4 * Vector4" );
+    print( ( a_Matrix4 * a_Vector3 ), "Matrix4 * Vector3" );
+    print( ( a_Matrix4 * a_Point3 ), "Matrix4 * Point3" );
+    print( ( a_Matrix4 * b_Matrix4 ), "Matrix4 * Matrix4" );
+    print( ( a_Matrix4 * b_Transform3 ), "Matrix4 * Transform3" );
+}
+
+void
+Transform3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    Vector4 tmpV4;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( c_Vector3, pad );
+    c_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( d_Vector3, pad );
+    d_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( c_Point3 ), pad );
+    c_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( d_Point3 ), pad );
+    d_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( ( a_Transform3 * a_Vector3 ), "Transform3 * Vector3" );
+    print( ( a_Transform3 * a_Point3 ), "Transform3 * Point3" );
+    print( ( a_Transform3 * b_Transform3 ), "Transform3 * Transform3" );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test3_reference.txt b/Extras/vectormathlibrary/tests/test3_reference.txt
new file mode 100644
index 000000000..834b0fe20
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test3_reference.txt
@@ -0,0 +1,392 @@
+set Vector3 with floats: ( -0.658344 0.499804 -0.807257 )
+set Vector3 with floats: ( 0.740930 0.154607 0.571599 )
+set Vector3 with floats: ( 0.384388 -0.262467 0.747808 )
+set Vector3 with floats: ( 0.490190 -0.107908 -0.292544 )
+set Vector4 with floats: ( 0.465039 -0.479556 -0.211412 0.553580 )
+set Vector4 with floats: ( 0.690070 0.151576 0.431077 -0.833992 )
+set Vector4 with floats: ( -0.088350 -0.780106 0.090456 -0.218627 )
+set Vector4 with floats: ( 0.137171 0.918133 0.735438 -0.673621 )
+set Point3 with floats: ( -0.448982 -0.479278 0.848189 )
+set Point3 with floats: ( -0.128155 0.578922 -0.744766 )
+set Point3 with floats: ( -0.835589 0.881284 -0.948850 )
+set Point3 with floats: ( -0.691578 -0.235635 -0.690527 )
+set Quat with floats: ( 0.058667 0.753697 -0.138777 -0.472188 )
+set Quat with floats: ( -0.372811 0.540183 -0.785218 0.542085 )
+set Quat with floats: ( 0.410391 -0.562721 0.523588 -0.176574 )
+set Quat with floats: ( 0.297654 0.859913 0.004837 0.374881 )
+set Matrix3 columns:
+( -0.658344 0.740930 0.384388 )
+( 0.499804 0.154607 -0.262467 )
+( -0.807257 0.571599 0.747808 )
+set Matrix3 columns:
+( 0.490190 -0.658344 0.740930 )
+( -0.107908 0.499804 0.154607 )
+( -0.292544 -0.807257 0.571599 )
+set Matrix4 columns:
+( 0.465039 0.690070 -0.088350 0.137171 )
+( -0.479556 0.151576 -0.780106 0.918133 )
+( -0.211412 0.431077 0.090456 0.735438 )
+( 0.553580 -0.833992 -0.218627 -0.673621 )
+set Matrix4 columns:
+( 0.137171 0.465039 0.690070 -0.088350 )
+( 0.918133 -0.479556 0.151576 -0.780106 )
+( 0.735438 -0.211412 0.431077 0.090456 )
+( -0.673621 0.553580 -0.833992 -0.218627 )
+set Transform3 columns:
+( -0.658344 0.740930 0.384388 0.490190 )
+( 0.499804 0.154607 -0.262467 -0.107908 )
+( -0.807257 0.571599 0.747808 -0.292544 )
+set Transform3 columns:
+( 0.490190 -0.658344 0.740930 0.384388 )
+( -0.107908 0.499804 0.154607 -0.262467 )
+( -0.292544 -0.807257 0.571599 0.747808 )
+Matrix3 + Matrix3:
+( -0.168154 0.082587 1.125319 )
+( 0.391896 0.654411 -0.107860 )
+( -1.099800 -0.235658 1.319407 )
+Matrix3 - Matrix3:
+( -1.148534 1.399274 -0.356542 )
+( 0.607712 -0.345197 -0.417075 )
+( -0.514713 1.378855 0.176210 )
+-Matrix3:
+( 0.658344 -0.740930 -0.384388 )
+( -0.499804 -0.154607 0.262467 )
+( 0.807257 -0.571599 -0.747808 )
+Matrix3 * float:
+( 0.084148 -0.094704 -0.049132 )
+( -0.063884 -0.019762 0.033548 )
+( 0.103182 -0.073061 -0.095583 )
+float * Matrix3:
+( -0.142598 0.160487 0.083259 )
+( 0.108258 0.033488 -0.056851 )
+( -0.174853 0.123809 0.161977 )
+Matrix3 * Vector3: ( 0.493437 -0.039891 0.213467 )
+Matrix3 * Matrix3:
+( -0.515117 0.493437 -0.153518 )
+( 0.305099 -0.039891 0.244197 )
+( -0.676156 0.213467 -0.082302 )
+set Vector3 with floats: ( 0.153117 0.265243 -0.073149 )
+set Vector3 with floats: ( 0.264488 -0.723410 0.921523 )
+set Vector3 with floats: ( -0.711250 -0.106634 -0.350831 )
+set Vector3 with floats: ( 0.905168 -0.283632 -0.203584 )
+set Vector4 with floats: ( -0.797437 0.910171 0.969234 0.151940 )
+set Vector4 with floats: ( 0.731827 -0.700248 0.818301 0.302505 )
+set Vector4 with floats: ( -0.872278 0.909999 0.932526 0.571087 )
+set Vector4 with floats: ( 0.610330 0.142507 -0.434829 0.925102 )
+set Point3 with floats: ( 0.158954 -0.126283 -0.249128 )
+set Point3 with floats: ( 0.846815 -0.942601 0.537720 )
+set Point3 with floats: ( 0.446214 0.181939 -0.148223 )
+set Point3 with floats: ( 0.284286 0.493525 -0.861963 )
+set Quat with floats: ( -0.893410 0.548627 0.407007 -0.757467 )
+set Quat with floats: ( -0.393126 -0.850984 0.375720 -0.270088 )
+set Quat with floats: ( 0.458888 -0.610828 -0.690816 -0.676415 )
+set Quat with floats: ( 0.664466 0.101874 -0.365714 0.055473 )
+set Matrix3 columns:
+( 0.153117 0.264488 -0.711250 )
+( 0.265243 -0.723410 -0.106634 )
+( -0.073149 0.921523 -0.350831 )
+set Matrix3 columns:
+( 0.905168 0.153117 0.264488 )
+( -0.283632 0.265243 -0.723410 )
+( -0.203584 -0.073149 0.921523 )
+set Matrix4 columns:
+( -0.797437 0.731827 -0.872278 0.610330 )
+( 0.910171 -0.700248 0.909999 0.142507 )
+( 0.969234 0.818301 0.932526 -0.434829 )
+( 0.151940 0.302505 0.571087 0.925102 )
+set Matrix4 columns:
+( 0.610330 -0.797437 0.731827 -0.872278 )
+( 0.142507 0.910171 -0.700248 0.909999 )
+( -0.434829 0.969234 0.818301 0.932526 )
+( 0.925102 0.151940 0.302505 0.571087 )
+set Transform3 columns:
+( 0.153117 0.264488 -0.711250 0.905168 )
+( 0.265243 -0.723410 -0.106634 -0.283632 )
+( -0.073149 0.921523 -0.350831 -0.203584 )
+set Transform3 columns:
+( 0.905168 0.153117 0.264488 -0.711250 )
+( -0.283632 0.265243 -0.723410 -0.106634 )
+( -0.203584 -0.073149 0.921523 -0.350831 )
+Matrix4 + Matrix4:
+( -0.187107 -0.065609 -0.140451 -0.261949 )
+( 1.052679 0.209923 0.209751 1.052506 )
+( 0.534405 1.787535 1.750826 0.497697 )
+( 1.077042 0.454445 0.873592 1.496189 )
+Matrix4 - Matrix4:
+( -1.407767 1.529264 -1.604106 1.482608 )
+( 0.767664 -1.610420 1.610247 -0.767491 )
+( 1.404062 -0.150933 0.114225 -1.367354 )
+( -0.773162 0.150565 0.268582 0.354015 )
+-Matrix4:
+( 0.797437 -0.731827 0.872278 -0.610330 )
+( -0.910171 0.700248 -0.909999 -0.142507 )
+( -0.969234 -0.818301 -0.932526 0.434829 )
+( -0.151940 -0.302505 -0.571087 -0.925102 )
+Matrix4 * float:
+( 0.106503 -0.097740 0.116498 -0.081513 )
+( -0.121559 0.093522 -0.121536 -0.019033 )
+( -0.129447 -0.109289 -0.124544 0.058074 )
+( -0.020293 -0.040401 -0.076272 -0.123553 )
+float * Matrix4:
+( 0.456647 -0.419076 0.499504 -0.349501 )
+( -0.521203 0.400992 -0.521105 -0.081606 )
+( -0.555025 -0.468594 -0.534004 0.249002 )
+( -0.087007 -0.173227 -0.327029 -0.529753 )
+Matrix4 * Vector4: ( 0.549286 -0.459496 0.809659 0.848246 )
+Matrix4 * Vector3: ( 0.135817 -0.112939 0.297242 0.061728 )
+Matrix4 * Point3: ( 0.608465 0.148906 -0.616420 0.768779 )
+Matrix4 * Matrix4:
+( 0.561500 0.549286 -1.625205 0.896678 )
+( 0.191855 -0.459496 1.944198 -0.501167 )
+( -0.099583 0.809659 0.767847 0.520490 )
+( 0.743332 0.848246 0.646534 1.203612 )
+Matrix4 * Transform3:
+( -0.751803 0.135817 -1.544148 1.405491 )
+( 0.837210 -0.112939 1.585880 -0.749438 )
+( 0.455376 0.297242 0.523727 -1.538614 )
+( -0.064533 0.061728 0.347621 0.584422 )
+set Vector3 with floats: ( 0.459209 -0.997261 0.172409 )
+set Vector3 with floats: ( -0.045124 0.879716 0.524317 )
+set Vector3 with floats: ( -0.744532 -0.970444 -0.000013 )
+set Vector3 with floats: ( 0.689543 0.704297 -0.817983 )
+set Vector4 with floats: ( 0.715505 0.577868 0.156952 -0.801022 )
+set Vector4 with floats: ( 0.656335 0.494393 0.816743 0.024285 )
+set Vector4 with floats: ( 0.769132 0.923895 0.133022 -0.052219 )
+set Vector4 with floats: ( -0.164886 0.300690 0.760403 0.171869 )
+set Point3 with floats: ( -0.554976 0.998693 -0.681641 )
+set Point3 with floats: ( 0.391195 0.403059 0.972411 )
+set Point3 with floats: ( 0.297195 0.309761 0.688408 )
+set Point3 with floats: ( 0.363540 0.940297 -0.336683 )
+set Quat with floats: ( 0.600164 -0.681272 0.726558 0.205513 )
+set Quat with floats: ( -0.160082 0.962714 0.737794 -0.071926 )
+set Quat with floats: ( -0.506313 0.689277 0.686485 0.473013 )
+set Quat with floats: ( -0.735610 -0.046390 0.568674 -0.004815 )
+set Matrix3 columns:
+( 0.459209 -0.045124 -0.744532 )
+( -0.997261 0.879716 -0.970444 )
+( 0.172409 0.524317 -0.000013 )
+set Matrix3 columns:
+( 0.689543 0.459209 -0.045124 )
+( 0.704297 -0.997261 0.879716 )
+( -0.817983 0.172409 0.524317 )
+set Matrix4 columns:
+( 0.715505 0.656335 0.769132 -0.164886 )
+( 0.577868 0.494393 0.923895 0.300690 )
+( 0.156952 0.816743 0.133022 0.760403 )
+( -0.801022 0.024285 -0.052219 0.171869 )
+set Matrix4 columns:
+( -0.164886 0.715505 0.656335 0.769132 )
+( 0.300690 0.577868 0.494393 0.923895 )
+( 0.760403 0.156952 0.816743 0.133022 )
+( 0.171869 -0.801022 0.024285 -0.052219 )
+set Transform3 columns:
+( 0.459209 -0.045124 -0.744532 0.689543 )
+( -0.997261 0.879716 -0.970444 0.704297 )
+( 0.172409 0.524317 -0.000013 -0.817983 )
+set Transform3 columns:
+( 0.689543 0.459209 -0.045124 -0.744532 )
+( 0.704297 -0.997261 0.879716 -0.970444 )
+( -0.817983 0.172409 0.524317 -0.000013 )
+Transform3 * Vector3: ( 0.127510 -1.502572 -0.443712 )
+Transform3 * Point3: ( 0.897132 2.797814 -0.390025 )
+Transform3 * Transform3:
+( 0.893878 0.127510 -0.450789 0.391447 )
+( 0.725733 -1.502572 0.310080 0.593088 )
+( 0.488169 -0.443712 0.453463 -1.455167 )
+set Vector3 with floats: ( 0.137637 -0.111879 -0.929543 )
+set Vector3 with floats: ( -0.336303 -0.146740 0.165140 )
+set Vector3 with floats: ( -0.823874 0.349776 0.174872 )
+set Vector3 with floats: ( -0.528584 0.489292 0.916708 )
+set Vector4 with floats: ( 0.728511 -0.851140 0.079620 -0.234370 )
+set Vector4 with floats: ( -0.996308 0.433229 -0.892684 -0.957911 )
+set Vector4 with floats: ( 0.517122 0.257921 0.862028 0.095881 )
+set Vector4 with floats: ( -0.171933 -0.214078 -0.604841 -0.383831 )
+set Point3 with floats: ( -0.581500 0.222183 -0.256120 )
+set Point3 with floats: ( -0.678699 -0.079553 0.605960 )
+set Point3 with floats: ( -0.633147 0.435875 -0.046627 )
+set Point3 with floats: ( -0.716491 0.267317 -0.514874 )
+set Quat with floats: ( -0.751700 0.742959 -0.793180 0.508814 )
+set Quat with floats: ( -0.238839 0.113471 -0.843523 -0.245250 )
+set Quat with floats: ( 0.250368 0.579243 -0.157280 0.648487 )
+set Quat with floats: ( 0.103833 0.456401 -0.022372 -0.475631 )
+set Matrix3 columns:
+( 0.137637 -0.336303 -0.823874 )
+( -0.111879 -0.146740 0.349776 )
+( -0.929543 0.165140 0.174872 )
+set Matrix3 columns:
+( -0.528584 0.137637 -0.336303 )
+( 0.489292 -0.111879 -0.146740 )
+( 0.916708 -0.929543 0.165140 )
+set Matrix4 columns:
+( 0.728511 -0.996308 0.517122 -0.171933 )
+( -0.851140 0.433229 0.257921 -0.214078 )
+( 0.079620 -0.892684 0.862028 -0.604841 )
+( -0.234370 -0.957911 0.095881 -0.383831 )
+set Matrix4 columns:
+( -0.171933 0.728511 -0.996308 0.517122 )
+( -0.214078 -0.851140 0.433229 0.257921 )
+( -0.604841 0.079620 -0.892684 0.862028 )
+( -0.383831 -0.234370 -0.957911 0.095881 )
+set Transform3 columns:
+( 0.137637 -0.336303 -0.823874 -0.528584 )
+( -0.111879 -0.146740 0.349776 0.489292 )
+( -0.929543 0.165140 0.174872 0.916708 )
+set Transform3 columns:
+( -0.528584 0.137637 -0.336303 -0.823874 )
+( 0.489292 -0.111879 -0.146740 0.349776 )
+( 0.916708 -0.929543 0.165140 0.174872 )
+Matrix3 + Matrix3:
+( -0.390948 -0.198667 -1.160178 )
+( 0.377413 -0.258619 0.203036 )
+( -0.012835 -0.764402 0.340013 )
+Matrix3 - Matrix3:
+( 0.666221 -0.473940 -0.487571 )
+( -0.601171 -0.034861 0.496517 )
+( -1.846250 1.094683 0.009732 )
+-Matrix3:
+( -0.137637 0.336303 0.823874 )
+( 0.111879 0.146740 -0.349776 )
+( 0.929543 -0.165140 -0.174872 )
+Matrix3 * float:
+( -0.000575 0.001405 0.003442 )
+( 0.000467 0.000613 -0.001461 )
+( 0.003884 -0.000690 -0.000731 )
+float * Matrix3:
+( -0.002872 0.007017 0.017190 )
+( 0.002334 0.003062 -0.007298 )
+( 0.019395 -0.003446 -0.003649 )
+Matrix3 * Vector3: ( 0.822395 -0.324114 -0.308966 )
+Matrix3 * Matrix3:
+( -0.992555 0.822395 -0.132993 )
+( 0.307981 -0.324114 0.116920 )
+( 0.732450 -0.308966 0.317254 )
+set Vector3 with floats: ( -0.016997 0.699144 0.837796 )
+set Vector3 with floats: ( -0.276082 0.091582 0.209064 )
+set Vector3 with floats: ( 0.219317 -0.118359 0.413442 )
+set Vector3 with floats: ( -0.567698 0.531358 -0.387226 )
+set Vector4 with floats: ( 0.572490 -0.820417 0.797191 0.867178 )
+set Vector4 with floats: ( 0.934764 0.237092 -0.866162 -0.773939 )
+set Vector4 with floats: ( 0.261311 -0.851570 0.114814 -0.531592 )
+set Vector4 with floats: ( 0.223925 0.869105 0.143405 0.148518 )
+set Point3 with floats: ( -0.071136 -0.758292 -0.527633 )
+set Point3 with floats: ( 0.997215 0.114440 0.727558 )
+set Point3 with floats: ( -0.425760 0.459888 0.642516 )
+set Point3 with floats: ( -0.022534 0.186095 -0.775679 )
+set Quat with floats: ( -0.683401 0.398134 0.189642 0.765986 )
+set Quat with floats: ( -0.137795 -0.579844 -0.635647 0.374970 )
+set Quat with floats: ( -0.563750 -0.471075 -0.553800 -0.014688 )
+set Quat with floats: ( -0.464365 -0.107890 -0.527503 -0.406423 )
+set Matrix3 columns:
+( -0.016997 -0.276082 0.219317 )
+( 0.699144 0.091582 -0.118359 )
+( 0.837796 0.209064 0.413442 )
+set Matrix3 columns:
+( -0.567698 -0.016997 -0.276082 )
+( 0.531358 0.699144 0.091582 )
+( -0.387226 0.837796 0.209064 )
+set Matrix4 columns:
+( 0.572490 0.934764 0.261311 0.223925 )
+( -0.820417 0.237092 -0.851570 0.869105 )
+( 0.797191 -0.866162 0.114814 0.143405 )
+( 0.867178 -0.773939 -0.531592 0.148518 )
+set Matrix4 columns:
+( 0.223925 0.572490 0.934764 0.261311 )
+( 0.869105 -0.820417 0.237092 -0.851570 )
+( 0.143405 0.797191 -0.866162 0.114814 )
+( 0.148518 0.867178 -0.773939 -0.531592 )
+set Transform3 columns:
+( -0.016997 -0.276082 0.219317 -0.567698 )
+( 0.699144 0.091582 -0.118359 0.531358 )
+( 0.837796 0.209064 0.413442 -0.387226 )
+set Transform3 columns:
+( -0.567698 -0.016997 -0.276082 0.219317 )
+( 0.531358 0.699144 0.091582 -0.118359 )
+( -0.387226 0.837796 0.209064 0.413442 )
+Matrix4 + Matrix4:
+( 0.796414 1.507254 1.196075 0.485235 )
+( 0.048687 -0.583325 -0.614477 0.017535 )
+( 0.940596 -0.068971 -0.751347 0.258219 )
+( 1.015695 0.093239 -1.305531 -0.383075 )
+Matrix4 - Matrix4:
+( 0.348565 0.362275 -0.673454 -0.037386 )
+( -1.689522 1.057509 -1.088662 1.720674 )
+( 0.653787 -1.663353 0.980976 0.028590 )
+( 0.718660 -1.641117 0.242347 0.680110 )
+-Matrix4:
+( -0.572490 -0.934764 -0.261311 -0.223925 )
+( 0.820417 -0.237092 0.851570 -0.869105 )
+( -0.797191 0.866162 -0.114814 -0.143405 )
+( -0.867178 0.773939 0.531592 -0.148518 )
+Matrix4 * float:
+( 0.172469 0.281608 0.078723 0.067460 )
+( -0.247160 0.071427 -0.256545 0.261827 )
+( 0.240163 -0.260941 0.034589 0.043202 )
+( 0.261247 -0.233158 -0.160148 0.044743 )
+float * Matrix4:
+( 0.285975 0.466942 0.130532 0.111857 )
+( -0.409822 0.118434 -0.425383 0.434143 )
+( 0.398220 -0.432673 0.057353 0.071635 )
+( 0.433180 -0.386605 -0.265545 0.074189 )
+Matrix4 * Vector4: ( -0.036655 -0.589390 1.382884 0.836413 )
+Matrix4 * Vector3: ( 0.862729 -0.533736 -0.522930 -1.001200 )
+Matrix4 * Point3: ( -0.663500 1.196998 0.682919 0.954187 )
+Matrix4 * Matrix4:
+( 1.011332 -0.036655 0.357127 -0.735454 )
+( 0.029304 -0.589390 -0.645721 -0.976066 )
+( -0.536511 1.382884 0.329392 0.882861 )
+( -0.532626 0.836413 0.972614 0.745680 )
+Matrix4 * Transform3:
+( 0.070508 0.862729 -0.017816 0.346880 )
+( 0.921479 -0.533736 0.070183 0.309037 )
+( -0.957265 -0.522930 -0.275411 0.468230 )
+( -0.697687 -1.001200 -0.421428 0.210525 )
+set Vector3 with floats: ( 0.385180 -0.150218 0.519112 )
+set Vector3 with floats: ( -0.203209 -0.252017 0.282194 )
+set Vector3 with floats: ( 0.067637 0.798376 0.310782 )
+set Vector3 with floats: ( 0.861334 -0.980345 -0.655106 )
+set Vector4 with floats: ( 0.286765 0.532078 0.352671 0.540977 )
+set Vector4 with floats: ( 0.510961 0.791871 -0.564379 0.273199 )
+set Vector4 with floats: ( 0.194378 0.244636 -0.269608 -0.858162 )
+set Vector4 with floats: ( -0.495023 -0.277798 -0.032740 0.007412 )
+set Point3 with floats: ( -0.420178 -0.522577 0.324972 )
+set Point3 with floats: ( 0.795389 0.342900 -0.913636 )
+set Point3 with floats: ( 0.675222 0.144053 -0.632329 )
+set Point3 with floats: ( -0.947120 -0.049367 0.126333 )
+set Quat with floats: ( -0.664206 0.220879 0.284219 -0.387216 )
+set Quat with floats: ( 0.913568 0.531906 0.271995 -0.862601 )
+set Quat with floats: ( -0.738694 0.514248 -0.039363 0.429390 )
+set Quat with floats: ( -0.769469 0.281336 -0.203301 0.412586 )
+set Matrix3 columns:
+( 0.385180 -0.203209 0.067637 )
+( -0.150218 -0.252017 0.798376 )
+( 0.519112 0.282194 0.310782 )
+set Matrix3 columns:
+( 0.861334 0.385180 -0.203209 )
+( -0.980345 -0.150218 -0.252017 )
+( -0.655106 0.519112 0.282194 )
+set Matrix4 columns:
+( 0.286765 0.510961 0.194378 -0.495023 )
+( 0.532078 0.791871 0.244636 -0.277798 )
+( 0.352671 -0.564379 -0.269608 -0.032740 )
+( 0.540977 0.273199 -0.858162 0.007412 )
+set Matrix4 columns:
+( -0.495023 0.286765 0.510961 0.194378 )
+( -0.277798 0.532078 0.791871 0.244636 )
+( -0.032740 0.352671 -0.564379 -0.269608 )
+( 0.007412 0.540977 0.273199 -0.858162 )
+set Transform3 columns:
+( 0.385180 -0.203209 0.067637 0.861334 )
+( -0.150218 -0.252017 0.798376 -0.980345 )
+( 0.519112 0.282194 0.310782 -0.655106 )
+set Transform3 columns:
+( 0.861334 0.385180 -0.203209 0.067637 )
+( -0.980345 -0.150218 -0.252017 0.798376 )
+( -0.655106 0.519112 0.282194 0.310782 )
+Transform3 * Vector3: ( 0.214000 0.394443 0.318891 )
+Transform3 * Point3: ( 0.827662 -0.526078 -0.919697 )
+Transform3 * Transform3:
+( 0.486673 0.214000 -0.007973 0.746170 )
+( -0.405345 0.394443 0.319335 -0.943589 )
+( -0.033113 0.318891 -0.088905 -0.298112 )
+
+ __end__ 
diff --git a/Extras/vectormathlibrary/tests/test3_soa_c.c b/Extras/vectormathlibrary/tests/test3_soa_c.c
new file mode 100644
index 000000000..36cda98e2
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test3_soa_c.c
@@ -0,0 +1,433 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_SOA_TEST
+
+#include "vectormath_soa.h"
+#include "test.h"
+
+int iteration = 0;
+
+void
+Matrix3_methods_test()
+{
+    VmathSoaMatrix3 a_Matrix3, b_Matrix3;
+    VmathSoaMatrix4 a_Matrix4, b_Matrix4;
+    VmathSoaTransform3 a_Transform3, b_Transform3;
+    VmathSoaMatrix3 tmpM3_0, tmpM3_1, tmpM3_2, tmpM3_3, tmpM3_4, tmpM3_5;
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathSoaVector3 tmpV3_0;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &c_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &d_Quat, "set Quat with floats" );
+    vmathSoaM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathSoaM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathSoaM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathSoaM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathSoaM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathSoaT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathSoaT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathSoaT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathSoaM3Add( &tmpM3_0, &a_Matrix3, &b_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_0, "Matrix3 + Matrix3" );
+    vmathSoaM3Sub( &tmpM3_1, &a_Matrix3, &b_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_1, "Matrix3 - Matrix3" );
+    vmathSoaM3Neg( &tmpM3_2, &a_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_2, "-Matrix3" );
+    vmathSoaM3ScalarMul( &tmpM3_3, &a_Matrix3, randfloat() );
+    vmathSoaM3Prints( &tmpM3_3, "Matrix3 * float" );
+    vmathSoaM3ScalarMul( &tmpM3_4, &a_Matrix3, randfloat() );
+    vmathSoaM3Prints( &tmpM3_4, "float * Matrix3" );
+    vmathSoaM3MulV3( &tmpV3_0, &a_Matrix3, &a_Vector3 );
+    vmathSoaV3Prints( &tmpV3_0, "Matrix3 * Vector3" );
+    vmathSoaM3Mul( &tmpM3_5, &a_Matrix3, &b_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_5, "Matrix3 * Matrix3" );
+}
+
+void
+Matrix4_methods_test()
+{
+    VmathSoaMatrix3 a_Matrix3, b_Matrix3;
+    VmathSoaMatrix4 a_Matrix4, b_Matrix4;
+    VmathSoaTransform3 a_Transform3, b_Transform3;
+    VmathSoaMatrix4 tmpM4_0, tmpM4_1, tmpM4_2, tmpM4_3, tmpM4_4, tmpM4_5, tmpM4_6;
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathSoaVector4 tmpV4_0, tmpV4_1, tmpV4_2;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &c_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &d_Quat, "set Quat with floats" );
+    vmathSoaM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathSoaM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathSoaM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathSoaM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathSoaM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathSoaT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathSoaT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathSoaT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathSoaM4Add( &tmpM4_0, &a_Matrix4, &b_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_0, "Matrix4 + Matrix4" );
+    vmathSoaM4Sub( &tmpM4_1, &a_Matrix4, &b_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_1, "Matrix4 - Matrix4" );
+    vmathSoaM4Neg( &tmpM4_2, &a_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_2, "-Matrix4" );
+    vmathSoaM4ScalarMul( &tmpM4_3, &a_Matrix4, randfloat() );
+    vmathSoaM4Prints( &tmpM4_3, "Matrix4 * float" );
+    vmathSoaM4ScalarMul( &tmpM4_4, &a_Matrix4, randfloat() );
+    vmathSoaM4Prints( &tmpM4_4, "float * Matrix4" );
+    vmathSoaM4MulV4( &tmpV4_0, &a_Matrix4, &a_Vector4 );
+    vmathSoaV4Prints( &tmpV4_0, "Matrix4 * Vector4" );
+    vmathSoaM4MulV3( &tmpV4_1, &a_Matrix4, &a_Vector3 );
+    vmathSoaV4Prints( &tmpV4_1, "Matrix4 * Vector3" );
+    vmathSoaM4MulP3( &tmpV4_2, &a_Matrix4, &a_Point3 );
+    vmathSoaV4Prints( &tmpV4_2, "Matrix4 * Point3" );
+    vmathSoaM4Mul( &tmpM4_5, &a_Matrix4, &b_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_5, "Matrix4 * Matrix4" );
+    vmathSoaM4MulT3( &tmpM4_6, &a_Matrix4, &b_Transform3 );
+    vmathSoaM4Prints( &tmpM4_6, "Matrix4 * Transform3" );
+}
+
+void
+Transform3_methods_test()
+{
+    VmathSoaMatrix3 a_Matrix3, b_Matrix3;
+    VmathSoaMatrix4 a_Matrix4, b_Matrix4;
+    VmathSoaTransform3 a_Transform3, b_Transform3, tmpT3_0;
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathSoaVector3 tmpV3_0;
+    VmathSoaPoint3 tmpP3_0;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &c_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &d_Quat, "set Quat with floats" );
+    vmathSoaM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathSoaM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathSoaM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathSoaM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathSoaM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathSoaT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathSoaT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathSoaT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathSoaT3MulV3( &tmpV3_0, &a_Transform3, &a_Vector3 );
+    vmathSoaV3Prints( &tmpV3_0, "Transform3 * Vector3" );
+    vmathSoaT3MulP3( &tmpP3_0, &a_Transform3, &a_Point3 );
+    vmathSoaP3Prints( &tmpP3_0, "Transform3 * Point3" );
+    vmathSoaT3Mul( &tmpT3_0, &a_Transform3, &b_Transform3 );
+    vmathSoaT3Prints( &tmpT3_0, "Transform3 * Transform3" );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test3_soa_cpp.cpp b/Extras/vectormathlibrary/tests/test3_soa_cpp.cpp
new file mode 100644
index 000000000..dafcad16c
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test3_soa_cpp.cpp
@@ -0,0 +1,410 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_SOA_TEST
+
+#include "vectormath_soa.h"
+#include "test.h"
+
+int iteration = 0;
+
+using namespace Vectormath;
+using namespace Vectormath::Soa;
+
+void
+Matrix3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( ( a_Matrix3 + b_Matrix3 ), "Matrix3 + Matrix3" );
+    print( ( a_Matrix3 - b_Matrix3 ), "Matrix3 - Matrix3" );
+    print( ( -a_Matrix3 ), "-Matrix3" );
+    print( ( a_Matrix3 * randfloat() ), "Matrix3 * float" );
+    print( ( randfloat() * a_Matrix3 ), "float * Matrix3" );
+    print( ( a_Matrix3 * a_Vector3 ), "Matrix3 * Vector3" );
+    print( ( a_Matrix3 * b_Matrix3 ), "Matrix3 * Matrix3" );
+}
+
+void
+Matrix4_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( ( a_Matrix4 + b_Matrix4 ), "Matrix4 + Matrix4" );
+    print( ( a_Matrix4 - b_Matrix4 ), "Matrix4 - Matrix4" );
+    print( ( -a_Matrix4 ), "-Matrix4" );
+    print( ( a_Matrix4 * randfloat() ), "Matrix4 * float" );
+    print( ( randfloat() * a_Matrix4 ), "float * Matrix4" );
+    print( ( a_Matrix4 * a_Vector4 ), "Matrix4 * Vector4" );
+    print( ( a_Matrix4 * a_Vector3 ), "Matrix4 * Vector3" );
+    print( ( a_Matrix4 * a_Point3 ), "Matrix4 * Point3" );
+    print( ( a_Matrix4 * b_Matrix4 ), "Matrix4 * Matrix4" );
+    print( ( a_Matrix4 * b_Transform3 ), "Matrix4 * Transform3" );
+}
+
+void
+Transform3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( ( a_Transform3 * a_Vector3 ), "Transform3 * Vector3" );
+    print( ( a_Transform3 * a_Point3 ), "Transform3 * Point3" );
+    print( ( a_Transform3 * b_Transform3 ), "Transform3 * Transform3" );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test4_aos_c.c b/Extras/vectormathlibrary/tests/test4_aos_c.c
new file mode 100644
index 000000000..94b47f8e5
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test4_aos_c.c
@@ -0,0 +1,567 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_AOS_TEST
+
+#include "vectormath_aos.h"
+#include "test.h"
+
+int iteration = 0;
+
+void
+Matrix3_methods_test()
+{
+    VmathMatrix3 a_Matrix3, b_Matrix3;
+    VmathMatrix4 a_Matrix4, b_Matrix4;
+    VmathTransform3 a_Transform3, b_Transform3;
+    VmathMatrix3 tmpM3_0, tmpM3_1, tmpM3_2, tmpM3_3, tmpM3_4, tmpM3_5, tmpM3_6, tmpM3_7, tmpM3_8, tmpM3_9, tmpM3_10;
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7, tmpV3_8;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &c_Vector3, pad );
+    vmathV4GetXYZ( &c_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &d_Vector3, pad );
+    vmathV4GetXYZ( &d_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathV3MakeFromP3( &tmpV3_4, &c_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_4, pad );
+    vmathV4GetXYZ( &tmpV3_5, &tmpV4 );
+    vmathP3MakeFromV3( &c_Point3, &tmpV3_5 );
+    vmathV3MakeFromP3( &tmpV3_6, &d_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_6, pad );
+    vmathV4GetXYZ( &tmpV3_7, &tmpV4 );
+    vmathP3MakeFromV3( &d_Point3, &tmpV3_7 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQPrints( &c_Quat, "set Quat with floats" );
+    vmathQPrints( &d_Quat, "set Quat with floats" );
+    vmathM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathM3AppendScale( &tmpM3_0, &a_Matrix3, &a_Vector3 );
+    vmathM3Prints( &tmpM3_0, "appendScale Matrix3 Vector3" );
+    vmathM3PrependScale( &tmpM3_1, &a_Vector3, &a_Matrix3 );
+    vmathM3Prints( &tmpM3_1, "prependScale Vector3 Matrix3" );
+    vmathM3MulPerElem( &tmpM3_2, &a_Matrix3, &b_Matrix3 );
+    vmathM3Prints( &tmpM3_2, "mulPerElem Matrix3" );
+    vmathM3AbsPerElem( &tmpM3_3, &a_Matrix3 );
+    vmathM3Prints( &tmpM3_3, "absPerElem Matrix3" );
+    vmathM3Transpose( &tmpM3_4, &a_Matrix3 );
+    vmathM3Prints( &tmpM3_4, "transpose Matrix3" );
+    vmathM3Inverse( &tmpM3_5, &a_Matrix3 );
+    vmathM3Prints( &tmpM3_5, "inverse Matrix3" );
+    vmathM3Inverse( &tmpM3_6, &a_Matrix3 );
+    vmathM3Mul( &tmpM3_7, &tmpM3_6, &a_Matrix3 );
+    vmathM3Prints( &tmpM3_7, "inverse(Matrix3) * Matrix3" );
+    printf("%f\n", getfloat(vmathM3Determinant( &a_Matrix3 )) );
+    vmathV3Outer( &tmpM3_8, &a_Vector3, &b_Vector3 );
+    vmathM3Prints( &tmpM3_8, "outer Vector3" );
+    vmathV3RowMul( &tmpV3_8, &a_Vector3, &a_Matrix3 );
+    vmathV3Prints( &tmpV3_8, "rowMul Vector3" );
+    vmathV3CrossMatrix( &tmpM3_9, &a_Vector3 );
+    vmathM3Prints( &tmpM3_9, "crossMatrix" );
+    vmathV3CrossMatrixMul( &tmpM3_10, &a_Vector3, &a_Matrix3 );
+    vmathM3Prints( &tmpM3_10, "crossMatrixMul" );
+}
+
+void
+Matrix4_methods_test()
+{
+    VmathMatrix3 a_Matrix3, b_Matrix3;
+    VmathMatrix4 a_Matrix4, b_Matrix4;
+    VmathTransform3 a_Transform3, b_Transform3;
+    VmathMatrix4 tmpM4_0, tmpM4_1, tmpM4_2, tmpM4_3, tmpM4_4, tmpM4_5, tmpM4_6, tmpM4_7;
+    VmathMatrix3 tmpM3_0;
+    VmathMatrix4 tmpM4_8, tmpM4_9, tmpM4_10, tmpM4_11, tmpM4_12, tmpM4_13, tmpM4_14;
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7;
+    VmathVector4 tmpV4_0;
+    VmathQuat tmpQ_0;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &c_Vector3, pad );
+    vmathV4GetXYZ( &c_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &d_Vector3, pad );
+    vmathV4GetXYZ( &d_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathV3MakeFromP3( &tmpV3_4, &c_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_4, pad );
+    vmathV4GetXYZ( &tmpV3_5, &tmpV4 );
+    vmathP3MakeFromV3( &c_Point3, &tmpV3_5 );
+    vmathV3MakeFromP3( &tmpV3_6, &d_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_6, pad );
+    vmathV4GetXYZ( &tmpV3_7, &tmpV4 );
+    vmathP3MakeFromV3( &d_Point3, &tmpV3_7 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQPrints( &c_Quat, "set Quat with floats" );
+    vmathQPrints( &d_Quat, "set Quat with floats" );
+    vmathM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathM4AppendScale( &tmpM4_0, &a_Matrix4, &a_Vector3 );
+    vmathM4Prints( &tmpM4_0, "appendScale Matrix4 Vector3" );
+    vmathM4PrependScale( &tmpM4_1, &a_Vector3, &a_Matrix4 );
+    vmathM4Prints( &tmpM4_1, "prependScale Vector3 Matrix4" );
+    vmathM4MulPerElem( &tmpM4_2, &a_Matrix4, &b_Matrix4 );
+    vmathM4Prints( &tmpM4_2, "mulPerElem Matrix4" );
+    vmathM4AbsPerElem( &tmpM4_3, &a_Matrix4 );
+    vmathM4Prints( &tmpM4_3, "absPerElem Matrix4" );
+    vmathM4Transpose( &tmpM4_4, &a_Matrix4 );
+    vmathM4Prints( &tmpM4_4, "transpose Matrix4" );
+    vmathM4Inverse( &tmpM4_5, &a_Matrix4 );
+    vmathM4Prints( &tmpM4_5, "inverse Matrix4" );
+    vmathM4Inverse( &tmpM4_6, &a_Matrix4 );
+    vmathM4Mul( &tmpM4_7, &tmpM4_6, &a_Matrix4 );
+    vmathM4Prints( &tmpM4_7, "inverse(Matrix4) * Matrix4" );
+    vmathV4MakeFromElems( &tmpV4_0, 0.0f, 0.0f, 0.0f, 1.0f );
+    vmathM4SetRow( &a_Matrix4, 3, &tmpV4_0 );
+    vmathQNormalize( &tmpQ_0, &a_Quat );
+    vmathM3MakeFromQ( &tmpM3_0, &tmpQ_0 );
+    vmathM4SetUpper3x3( &a_Matrix4, &tmpM3_0 );
+    vmathM4AffineInverse( &tmpM4_8, &a_Matrix4 );
+    vmathM4Prints( &tmpM4_8, "affineInverse Matrix4" );
+    vmathM4AffineInverse( &tmpM4_9, &a_Matrix4 );
+    vmathM4Mul( &tmpM4_10, &tmpM4_9, &a_Matrix4 );
+    vmathM4Prints( &tmpM4_10, "affineInverse(Matrix4) * Matrix4" );
+    vmathM4OrthoInverse( &tmpM4_11, &a_Matrix4 );
+    vmathM4Prints( &tmpM4_11, "orthoInverse Matrix4" );
+    vmathM4OrthoInverse( &tmpM4_12, &a_Matrix4 );
+    vmathM4Mul( &tmpM4_13, &tmpM4_12, &a_Matrix4 );
+    vmathM4Prints( &tmpM4_13, "orthoInverse(Matrix4) * Matrix4" );
+    printf("%f\n", getfloat(vmathM4Determinant( &a_Matrix4 )) );
+    vmathV4Outer( &tmpM4_14, &a_Vector4, &b_Vector4 );
+    vmathM4Prints( &tmpM4_14, "outer Vector4" );
+}
+
+void
+Transform3_methods_test()
+{
+    VmathMatrix3 a_Matrix3, b_Matrix3;
+    VmathMatrix4 a_Matrix4, b_Matrix4;
+    VmathTransform3 a_Transform3, b_Transform3, tmpT3_0, tmpT3_1, tmpT3_2, tmpT3_3, tmpT3_4, tmpT3_5, tmpT3_6;
+    VmathMatrix3 tmpM3_0;
+    VmathTransform3 tmpT3_7, tmpT3_8, tmpT3_9;
+    VmathVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathVector4 tmpV4;
+    VmathVector3 tmpV3_0, tmpV3_1, tmpV3_2, tmpV3_3, tmpV3_4, tmpV3_5, tmpV3_6, tmpV3_7;
+    VmathQuat tmpQ_0;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &a_Vector3, pad );
+    vmathV4GetXYZ( &a_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &b_Vector3, pad );
+    vmathV4GetXYZ( &b_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &c_Vector3, pad );
+    vmathV4GetXYZ( &c_Vector3, &tmpV4 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &d_Vector3, pad );
+    vmathV4GetXYZ( &d_Vector3, &tmpV4 );
+    vmathV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathV3MakeFromP3( &tmpV3_0, &a_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_0, pad );
+    vmathV4GetXYZ( &tmpV3_1, &tmpV4 );
+    vmathP3MakeFromV3( &a_Point3, &tmpV3_1 );
+    vmathV3MakeFromP3( &tmpV3_2, &b_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_2, pad );
+    vmathV4GetXYZ( &tmpV3_3, &tmpV4 );
+    vmathP3MakeFromV3( &b_Point3, &tmpV3_3 );
+    vmathV3MakeFromP3( &tmpV3_4, &c_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_4, pad );
+    vmathV4GetXYZ( &tmpV3_5, &tmpV4 );
+    vmathP3MakeFromV3( &c_Point3, &tmpV3_5 );
+    vmathV3MakeFromP3( &tmpV3_6, &d_Point3 );
+    vmathV4MakeFromV3Scalar( &tmpV4, &tmpV3_6, pad );
+    vmathV4GetXYZ( &tmpV3_7, &tmpV4 );
+    vmathP3MakeFromV3( &d_Point3, &tmpV3_7 );
+    vmathP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathQPrints( &a_Quat, "set Quat with floats" );
+    vmathQPrints( &b_Quat, "set Quat with floats" );
+    vmathQPrints( &c_Quat, "set Quat with floats" );
+    vmathQPrints( &d_Quat, "set Quat with floats" );
+    vmathM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathT3AppendScale( &tmpT3_0, &a_Transform3, &a_Vector3 );
+    vmathT3Prints( &tmpT3_0, "appendScale Transform3 Vector3" );
+    vmathT3PrependScale( &tmpT3_1, &a_Vector3, &a_Transform3 );
+    vmathT3Prints( &tmpT3_1, "prependScale Vector3 Transform3" );
+    vmathT3MulPerElem( &tmpT3_2, &a_Transform3, &b_Transform3 );
+    vmathT3Prints( &tmpT3_2, "mulPerElem Transform3" );
+    vmathT3AbsPerElem( &tmpT3_3, &a_Transform3 );
+    vmathT3Prints( &tmpT3_3, "absPerElem Transform3" );
+    vmathT3Inverse( &tmpT3_4, &a_Transform3 );
+    vmathT3Prints( &tmpT3_4, "inverse Transform3" );
+    vmathT3Inverse( &tmpT3_5, &a_Transform3 );
+    vmathT3Mul( &tmpT3_6, &tmpT3_5, &a_Transform3 );
+    vmathT3Prints( &tmpT3_6, "inverse(Transform3) * Transform3" );
+    vmathQNormalize( &tmpQ_0, &a_Quat );
+    vmathM3MakeFromQ( &tmpM3_0, &tmpQ_0 );
+    vmathT3SetUpper3x3( &a_Transform3, &tmpM3_0 );
+    vmathT3OrthoInverse( &tmpT3_7, &a_Transform3 );
+    vmathT3Prints( &tmpT3_7, "orthoInverse Transform3" );
+    vmathT3OrthoInverse( &tmpT3_8, &a_Transform3 );
+    vmathT3Mul( &tmpT3_9, &tmpT3_8, &a_Transform3 );
+    vmathT3Prints( &tmpT3_9, "orthoInverse(Transform3) * Transform3" );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test4_aos_cpp.cpp b/Extras/vectormathlibrary/tests/test4_aos_cpp.cpp
new file mode 100644
index 000000000..e55bee0e5
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test4_aos_cpp.cpp
@@ -0,0 +1,492 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_AOS_TEST
+
+#include "vectormath_aos.h"
+#include "test.h"
+
+int iteration = 0;
+
+using namespace Vectormath;
+using namespace Vectormath::Aos;
+
+void
+Matrix3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    Vector4 tmpV4;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( c_Vector3, pad );
+    c_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( d_Vector3, pad );
+    d_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( c_Point3 ), pad );
+    c_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( d_Point3 ), pad );
+    d_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( appendScale( a_Matrix3, a_Vector3 ), "appendScale Matrix3 Vector3" );
+    print( prependScale( a_Vector3, a_Matrix3 ), "prependScale Vector3 Matrix3" );
+    print( mulPerElem( a_Matrix3, b_Matrix3 ), "mulPerElem Matrix3" );
+    print( absPerElem( a_Matrix3 ), "absPerElem Matrix3" );
+    print( transpose( a_Matrix3 ), "transpose Matrix3" );
+    print( inverse( a_Matrix3 ), "inverse Matrix3" );
+    print( ( inverse( a_Matrix3 ) * a_Matrix3 ), "inverse(Matrix3) * Matrix3" );
+    printf("%f\n", getfloat(determinant( a_Matrix3 )) );
+    print( outer( a_Vector3, b_Vector3 ), "outer Vector3" );
+    print( rowMul( a_Vector3, a_Matrix3 ), "rowMul Vector3" );
+    print( crossMatrix( a_Vector3 ), "crossMatrix" );
+    print( crossMatrixMul( a_Vector3, a_Matrix3 ), "crossMatrixMul" );
+}
+
+void
+Matrix4_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    Vector4 tmpV4;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( c_Vector3, pad );
+    c_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( d_Vector3, pad );
+    d_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( c_Point3 ), pad );
+    c_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( d_Point3 ), pad );
+    d_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( appendScale( a_Matrix4, a_Vector3 ), "appendScale Matrix4 Vector3" );
+    print( prependScale( a_Vector3, a_Matrix4 ), "prependScale Vector3 Matrix4" );
+    print( mulPerElem( a_Matrix4, b_Matrix4 ), "mulPerElem Matrix4" );
+    print( absPerElem( a_Matrix4 ), "absPerElem Matrix4" );
+    print( transpose( a_Matrix4 ), "transpose Matrix4" );
+    print( inverse( a_Matrix4 ), "inverse Matrix4" );
+    print( ( inverse( a_Matrix4 ) * a_Matrix4 ), "inverse(Matrix4) * Matrix4" );
+    a_Matrix4.setRow( 3, Vector4( 0.0f, 0.0f, 0.0f, 1.0f ) );
+    a_Matrix4.setUpper3x3( Matrix3( normalize( a_Quat ) ) );
+    print( affineInverse( a_Matrix4 ), "affineInverse Matrix4" );
+    print( ( affineInverse( a_Matrix4 ) * a_Matrix4 ), "affineInverse(Matrix4) * Matrix4" );
+    print( orthoInverse( a_Matrix4 ), "orthoInverse Matrix4" );
+    print( ( orthoInverse( a_Matrix4 ) * a_Matrix4 ), "orthoInverse(Matrix4) * Matrix4" );
+    printf("%f\n", getfloat(determinant( a_Matrix4 )) );
+    print( outer( a_Vector4, b_Vector4 ), "outer Vector4" );
+}
+
+void
+Transform3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    Vector4 tmpV4;
+    float rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6, pad;
+    // set a pad value to detect invalid use of padding.
+    // this will be nan for scalar/ppu implementations, max. float for spu
+    union { float f; unsigned int u; } tmp;
+    tmp.u = 0x7fffffff;
+    pad = tmp.f;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( a_Vector3, pad );
+    a_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( b_Vector3, pad );
+    b_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( c_Vector3, pad );
+    c_Vector3 = tmpV4.getXYZ( );
+    tmpV4 = Vector4( d_Vector3, pad );
+    d_Vector3 = tmpV4.getXYZ( );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    tmpV4 = Vector4( Vector3( a_Point3 ), pad );
+    a_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( b_Point3 ), pad );
+    b_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( c_Point3 ), pad );
+    c_Point3 = Point3( tmpV4.getXYZ( ) );
+    tmpV4 = Vector4( Vector3( d_Point3 ), pad );
+    d_Point3 = Point3( tmpV4.getXYZ( ) );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( appendScale( a_Transform3, a_Vector3 ), "appendScale Transform3 Vector3" );
+    print( prependScale( a_Vector3, a_Transform3 ), "prependScale Vector3 Transform3" );
+    print( mulPerElem( a_Transform3, b_Transform3 ), "mulPerElem Transform3" );
+    print( absPerElem( a_Transform3 ), "absPerElem Transform3" );
+    print( inverse( a_Transform3 ), "inverse Transform3" );
+    print( ( inverse( a_Transform3 ) * a_Transform3 ), "inverse(Transform3) * Transform3" );
+    a_Transform3.setUpper3x3( Matrix3( normalize( a_Quat ) ) );
+    print( orthoInverse( a_Transform3 ), "orthoInverse Transform3" );
+    print( ( orthoInverse( a_Transform3 ) * a_Transform3 ), "orthoInverse(Transform3) * Transform3" );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test4_reference.txt b/Extras/vectormathlibrary/tests/test4_reference.txt
new file mode 100644
index 000000000..23afd2d67
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test4_reference.txt
@@ -0,0 +1,524 @@
+set Vector3 with floats: ( -0.658344 0.499804 -0.807257 )
+set Vector3 with floats: ( 0.740930 0.154607 0.571599 )
+set Vector3 with floats: ( 0.384388 -0.262467 0.747808 )
+set Vector3 with floats: ( 0.490190 -0.107908 -0.292544 )
+set Vector4 with floats: ( 0.465039 -0.479556 -0.211412 0.553580 )
+set Vector4 with floats: ( 0.690070 0.151576 0.431077 -0.833992 )
+set Vector4 with floats: ( -0.088350 -0.780106 0.090456 -0.218627 )
+set Vector4 with floats: ( 0.137171 0.918133 0.735438 -0.673621 )
+set Point3 with floats: ( -0.448982 -0.479278 0.848189 )
+set Point3 with floats: ( -0.128155 0.578922 -0.744766 )
+set Point3 with floats: ( -0.835589 0.881284 -0.948850 )
+set Point3 with floats: ( -0.691578 -0.235635 -0.690527 )
+set Quat with floats: ( 0.058667 0.753697 -0.138777 -0.472188 )
+set Quat with floats: ( -0.372811 0.540183 -0.785218 0.542085 )
+set Quat with floats: ( 0.410391 -0.562721 0.523588 -0.176574 )
+set Quat with floats: ( 0.297654 0.859913 0.004837 0.374881 )
+set Matrix3 columns:
+( -0.658344 0.740930 0.384388 )
+( 0.499804 0.154607 -0.262467 )
+( -0.807257 0.571599 0.747808 )
+set Matrix3 columns:
+( 0.490190 -0.658344 0.740930 )
+( -0.107908 0.499804 0.154607 )
+( -0.292544 -0.807257 0.571599 )
+set Matrix4 columns:
+( 0.465039 0.690070 -0.088350 0.137171 )
+( -0.479556 0.151576 -0.780106 0.918133 )
+( -0.211412 0.431077 0.090456 0.735438 )
+( 0.553580 -0.833992 -0.218627 -0.673621 )
+set Matrix4 columns:
+( 0.137171 0.465039 0.690070 -0.088350 )
+( 0.918133 -0.479556 0.151576 -0.780106 )
+( 0.735438 -0.211412 0.431077 0.090456 )
+( -0.673621 0.553580 -0.833992 -0.218627 )
+set Transform3 columns:
+( -0.658344 0.740930 0.384388 0.490190 )
+( 0.499804 0.154607 -0.262467 -0.107908 )
+( -0.807257 0.571599 0.747808 -0.292544 )
+set Transform3 columns:
+( 0.490190 -0.658344 0.740930 0.384388 )
+( -0.107908 0.499804 0.154607 -0.262467 )
+( -0.292544 -0.807257 0.571599 0.747808 )
+appendScale Matrix3 Vector3:
+( 0.433417 0.370320 -0.310300 )
+( -0.329043 0.077273 0.211879 )
+( 0.531453 0.285687 -0.603673 )
+prependScale Vector3 Matrix3:
+( 0.433417 -0.487787 -0.253060 )
+( 0.249804 0.077273 -0.131182 )
+( 0.651663 -0.461427 -0.603673 )
+mulPerElem Matrix3:
+( -0.322714 -0.487787 0.284805 )
+( -0.053933 0.077273 -0.040579 )
+( 0.236158 -0.461427 0.427446 )
+absPerElem Matrix3:
+( 0.658344 0.740930 0.384388 )
+( 0.499804 0.154607 0.262467 )
+( 0.807257 0.571599 0.747808 )
+transpose Matrix3:
+( -0.658344 0.499804 -0.807257 )
+( 0.740930 0.154607 0.571599 )
+( 0.384388 -0.262467 0.747808 )
+inverse Matrix3:
+( -1.938491 2.439935 1.852797 )
+( 1.181290 1.328230 -0.141021 )
+( -2.995533 1.618649 3.445122 )
+inverse(Matrix3) * Matrix3:
+( 1.000000 0.000000 0.000000 )
+( 0.000000 1.000000 -0.000000 )
+( 0.000000 -0.000000 1.000000 )
+-0.137036
+outer Vector3:
+( -0.487787 -0.101785 -0.376308 )
+( 0.370320 0.077273 0.285687 )
+( -0.598121 -0.124808 -0.461427 )
+rowMul Vector3: ( 1.334884 -0.871941 -0.987915 )
+crossMatrix:
+( 0.000000 0.807257 0.499804 )
+( -0.807257 0.000000 0.658344 )
+( -0.499804 -0.658344 0.000000 )
+crossMatrixMul:
+( 0.000000 0.410495 0.161879 )
+( 0.000000 -0.221813 0.182015 )
+( 0.000000 -0.472105 -0.019325 )
+set Vector3 with floats: ( -0.127818 0.216602 0.153117 )
+set Vector3 with floats: ( 0.265243 -0.073149 0.264488 )
+set Vector3 with floats: ( -0.723410 0.921523 -0.711250 )
+set Vector3 with floats: ( -0.106634 -0.350831 0.905168 )
+set Vector4 with floats: ( -0.283632 -0.203584 -0.797437 0.910171 )
+set Vector4 with floats: ( 0.969234 0.151940 0.731827 -0.700248 )
+set Vector4 with floats: ( 0.818301 0.302505 -0.872278 0.909999 )
+set Vector4 with floats: ( 0.932526 0.571087 0.610330 0.142507 )
+set Point3 with floats: ( -0.434829 0.925102 0.158954 )
+set Point3 with floats: ( -0.126283 -0.249128 0.846815 )
+set Point3 with floats: ( -0.942601 0.537720 0.446214 )
+set Point3 with floats: ( 0.181939 -0.148223 0.284286 )
+set Quat with floats: ( 0.493525 -0.861963 -0.893410 0.548627 )
+set Quat with floats: ( 0.407007 -0.757467 -0.393126 -0.850984 )
+set Quat with floats: ( 0.375720 -0.270088 0.458888 -0.610828 )
+set Quat with floats: ( -0.690816 -0.676415 0.664466 0.101874 )
+set Matrix3 columns:
+( -0.127818 0.265243 -0.723410 )
+( 0.216602 -0.073149 0.921523 )
+( 0.153117 0.264488 -0.711250 )
+set Matrix3 columns:
+( -0.106634 -0.127818 0.265243 )
+( -0.350831 0.216602 -0.073149 )
+( 0.905168 0.153117 0.264488 )
+set Matrix4 columns:
+( -0.283632 0.969234 0.818301 0.932526 )
+( -0.203584 0.151940 0.302505 0.571087 )
+( -0.797437 0.731827 -0.872278 0.610330 )
+( 0.910171 -0.700248 0.909999 0.142507 )
+set Matrix4 columns:
+( 0.932526 -0.283632 0.969234 0.818301 )
+( 0.571087 -0.203584 0.151940 0.302505 )
+( 0.610330 -0.797437 0.731827 -0.872278 )
+( 0.142507 0.910171 -0.700248 0.909999 )
+set Transform3 columns:
+( -0.127818 0.265243 -0.723410 -0.106634 )
+( 0.216602 -0.073149 0.921523 -0.350831 )
+( 0.153117 0.264488 -0.711250 0.905168 )
+set Transform3 columns:
+( -0.106634 -0.127818 0.265243 -0.723410 )
+( -0.350831 0.216602 -0.073149 0.921523 )
+( 0.905168 0.153117 0.264488 -0.711250 )
+appendScale Matrix4 Vector3:
+( 0.036253 0.209938 0.125296 0.932526 )
+( 0.026022 0.032911 0.046319 0.571087 )
+( 0.101927 0.158515 -0.133561 0.610330 )
+( -0.116336 -0.151675 0.139337 0.142507 )
+prependScale Vector3 Matrix4:
+( 0.036253 -0.123886 -0.104594 -0.119194 )
+( -0.044097 0.032911 0.065523 0.123698 )
+( -0.122101 0.112055 -0.133561 0.093452 )
+( 0.910171 -0.700248 0.909999 0.142507 )
+mulPerElem Matrix4:
+( -0.264494 -0.274906 0.793125 0.763086 )
+( -0.116264 -0.030933 0.045963 0.172757 )
+( -0.486699 -0.583586 -0.638357 -0.532377 )
+( 0.129706 -0.637346 -0.637225 0.129682 )
+absPerElem Matrix4:
+( 0.283632 0.969234 0.818301 0.932526 )
+( 0.203584 0.151940 0.302505 0.571087 )
+( 0.797437 0.731827 0.872278 0.610330 )
+( 0.910171 0.700248 0.909999 0.142507 )
+transpose Matrix4:
+( -0.283632 -0.203584 -0.797437 0.910171 )
+( 0.969234 0.151940 0.731827 -0.700248 )
+( 0.818301 0.302505 -0.872278 0.909999 )
+( 0.932526 0.571087 0.610330 0.142507 )
+inverse Matrix4:
+( 0.756962 -3.392262 1.563321 1.945501 )
+( 1.235862 -2.616357 0.503558 0.241096 )
+( 0.221503 1.293015 -1.354804 -0.828755 )
+( -0.176291 0.552941 1.140966 1.068388 )
+inverse(Matrix4) * Matrix4:
+( 1.000000 0.000000 -0.000000 0.000000 )
+( -0.000000 1.000000 -0.000000 -0.000000 )
+( 0.000000 -0.000000 1.000000 -0.000000 )
+( -0.000000 0.000000 0.000000 1.000000 )
+affineInverse Matrix4:
+( -0.477821 -0.877922 0.030662 0.928236 )
+( 0.062087 0.001066 0.998070 -0.667659 )
+( -0.876260 0.478803 0.053999 0.510740 )
+( 0.000000 0.000000 0.000000 1.000000 )
+affineInverse(Matrix4) * Matrix4:
+( 1.000000 0.000000 -0.000000 0.000000 )
+( 0.000000 1.000000 0.000000 0.000000 )
+( -0.000000 0.000000 1.000000 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+orthoInverse Matrix4:
+( -0.477822 -0.877922 0.030662 0.928237 )
+( 0.062087 0.001066 0.998070 -0.667659 )
+( -0.876260 0.478803 0.053999 0.510740 )
+( 0.000000 0.000000 0.000000 1.000000 )
+orthoInverse(Matrix4) * Matrix4:
+( 1.000000 0.000000 0.000000 0.000000 )
+( 0.000000 1.000000 -0.000000 -0.000000 )
+( 0.000000 -0.000000 1.000000 -0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+1.000000
+outer Vector4:
+( -0.274906 -0.043095 -0.207570 0.198613 )
+( -0.197320 -0.030933 -0.148988 0.142559 )
+( -0.772903 -0.121163 -0.583586 0.558404 )
+( 0.882169 0.138292 0.666089 -0.637346 )
+set Vector3 with floats: ( -0.365714 0.055473 -0.133556 )
+set Vector3 with floats: ( -0.572643 0.459209 -0.997261 )
+set Vector3 with floats: ( 0.172409 -0.045124 0.879716 )
+set Vector3 with floats: ( 0.524317 -0.744532 -0.970444 )
+set Vector4 with floats: ( -0.000013 0.689543 0.704297 -0.817983 )
+set Vector4 with floats: ( 0.715505 0.577868 0.156952 -0.801022 )
+set Vector4 with floats: ( 0.656335 0.494393 0.816743 0.024285 )
+set Vector4 with floats: ( 0.769132 0.923895 0.133022 -0.052219 )
+set Point3 with floats: ( -0.164886 0.300690 0.760403 )
+set Point3 with floats: ( 0.171869 -0.554976 0.998693 )
+set Point3 with floats: ( -0.681641 0.391195 0.403059 )
+set Point3 with floats: ( 0.972411 0.297195 0.309761 )
+set Quat with floats: ( 0.688408 0.363540 0.940297 -0.336683 )
+set Quat with floats: ( 0.600164 -0.681272 0.726558 0.205513 )
+set Quat with floats: ( -0.160082 0.962714 0.737794 -0.071926 )
+set Quat with floats: ( -0.506313 0.689277 0.686485 0.473013 )
+set Matrix3 columns:
+( -0.365714 -0.572643 0.172409 )
+( 0.055473 0.459209 -0.045124 )
+( -0.133556 -0.997261 0.879716 )
+set Matrix3 columns:
+( 0.524317 -0.365714 -0.572643 )
+( -0.744532 0.055473 0.459209 )
+( -0.970444 -0.133556 -0.997261 )
+set Matrix4 columns:
+( -0.000013 0.715505 0.656335 0.769132 )
+( 0.689543 0.577868 0.494393 0.923895 )
+( 0.704297 0.156952 0.816743 0.133022 )
+( -0.817983 -0.801022 0.024285 -0.052219 )
+set Matrix4 columns:
+( 0.769132 -0.000013 0.715505 0.656335 )
+( 0.923895 0.689543 0.577868 0.494393 )
+( 0.133022 0.704297 0.156952 0.816743 )
+( -0.052219 -0.817983 -0.801022 0.024285 )
+set Transform3 columns:
+( -0.365714 -0.572643 0.172409 0.524317 )
+( 0.055473 0.459209 -0.045124 -0.744532 )
+( -0.133556 -0.997261 0.879716 -0.970444 )
+set Transform3 columns:
+( 0.524317 -0.365714 -0.572643 0.172409 )
+( -0.744532 0.055473 0.459209 -0.045124 )
+( -0.970444 -0.133556 -0.997261 0.879716 )
+appendScale Transform3 Vector3:
+( 0.133747 -0.031766 -0.023026 0.524317 )
+( -0.020287 0.025474 0.006027 -0.744532 )
+( 0.048843 -0.055321 -0.117491 -0.970444 )
+prependScale Vector3 Transform3:
+( 0.133747 0.209424 -0.063052 -0.191750 )
+( 0.003077 0.025474 -0.002503 -0.041301 )
+( 0.017837 0.133190 -0.117491 0.129609 )
+mulPerElem Transform3:
+( -0.191750 0.209424 -0.098729 0.090397 )
+( -0.041301 0.025474 -0.020721 0.033596 )
+( 0.129609 0.133190 -0.877307 -0.853715 )
+absPerElem Transform3:
+( 0.365714 0.572643 0.172409 0.524317 )
+( 0.055473 0.459209 0.045124 0.744532 )
+( 0.133556 0.997261 0.879716 0.970444 )
+inverse Transform3:
+( -3.394501 -3.137797 0.504313 -0.066988 )
+( 0.404474 2.824531 0.065611 1.954553 )
+( -0.056825 2.725566 1.287672 3.308679 )
+inverse(Transform3) * Transform3:
+( 1.000000 -0.000000 0.000000 0.000000 )
+( 0.000000 1.000000 -0.000000 -0.000000 )
+( -0.000000 -0.000000 1.000000 0.000000 )
+orthoInverse Transform3:
+( -0.267562 -0.082713 0.959984 1.010315 )
+( 0.706975 -0.693789 0.137268 -0.754017 )
+( 0.654673 0.715412 0.244108 0.426284 )
+orthoInverse(Transform3) * Transform3:
+( 1.000000 0.000000 0.000000 -0.000000 )
+( 0.000000 1.000000 0.000000 -0.000000 )
+( 0.000000 0.000000 1.000000 -0.000000 )
+set Vector3 with floats: ( -0.735610 -0.046390 0.568674 )
+set Vector3 with floats: ( -0.004815 0.137637 -0.111879 )
+set Vector3 with floats: ( -0.929543 -0.336303 -0.146740 )
+set Vector3 with floats: ( 0.165140 -0.823874 0.349776 )
+set Vector4 with floats: ( 0.174872 -0.528584 0.489292 0.916708 )
+set Vector4 with floats: ( 0.728511 -0.851140 0.079620 -0.234370 )
+set Vector4 with floats: ( -0.996308 0.433229 -0.892684 -0.957911 )
+set Vector4 with floats: ( 0.517122 0.257921 0.862028 0.095881 )
+set Point3 with floats: ( -0.171933 -0.214078 -0.604841 )
+set Point3 with floats: ( -0.383831 -0.581500 0.222183 )
+set Point3 with floats: ( -0.256120 -0.678699 -0.079553 )
+set Point3 with floats: ( 0.605960 -0.633147 0.435875 )
+set Quat with floats: ( -0.046627 -0.716491 0.267317 -0.514874 )
+set Quat with floats: ( -0.751700 0.742959 -0.793180 0.508814 )
+set Quat with floats: ( -0.238839 0.113471 -0.843523 -0.245250 )
+set Quat with floats: ( 0.250368 0.579243 -0.157280 0.648487 )
+set Matrix3 columns:
+( -0.735610 -0.004815 -0.929543 )
+( -0.046390 0.137637 -0.336303 )
+( 0.568674 -0.111879 -0.146740 )
+set Matrix3 columns:
+( 0.165140 -0.735610 -0.004815 )
+( -0.823874 -0.046390 0.137637 )
+( 0.349776 0.568674 -0.111879 )
+set Matrix4 columns:
+( 0.174872 0.728511 -0.996308 0.517122 )
+( -0.528584 -0.851140 0.433229 0.257921 )
+( 0.489292 0.079620 -0.892684 0.862028 )
+( 0.916708 -0.234370 -0.957911 0.095881 )
+set Matrix4 columns:
+( 0.517122 0.174872 0.728511 -0.996308 )
+( 0.257921 -0.528584 -0.851140 0.433229 )
+( 0.862028 0.489292 0.079620 -0.892684 )
+( 0.095881 0.916708 -0.234370 -0.957911 )
+set Transform3 columns:
+( -0.735610 -0.004815 -0.929543 0.165140 )
+( -0.046390 0.137637 -0.336303 -0.823874 )
+( 0.568674 -0.111879 -0.146740 0.349776 )
+set Transform3 columns:
+( 0.165140 -0.735610 -0.004815 -0.929543 )
+( -0.823874 -0.046390 0.137637 -0.336303 )
+( 0.349776 0.568674 -0.111879 -0.146740 )
+appendScale Matrix3 Vector3:
+( 0.541123 0.000223 -0.528607 )
+( 0.034125 -0.006385 -0.191247 )
+( -0.418323 0.005190 -0.083447 )
+prependScale Vector3 Matrix3:
+( 0.541123 0.003542 0.683781 )
+( 0.002152 -0.006385 0.015601 )
+( 0.323390 -0.063623 -0.083447 )
+mulPerElem Matrix3:
+( -0.121479 0.003542 0.004475 )
+( 0.038220 -0.006385 -0.046288 )
+( 0.198909 -0.063623 0.016417 )
+absPerElem Matrix3:
+( 0.735610 0.004815 0.929543 )
+( 0.046390 0.137637 0.336303 )
+( 0.568674 0.111879 0.146740 )
+transpose Matrix3:
+( -0.735610 -0.046390 0.568674 )
+( -0.004815 0.137637 -0.111879 )
+( -0.929543 -0.336303 -0.146740 )
+inverse Matrix3:
+( -0.518959 0.927036 1.162799 )
+( -1.777555 5.713095 -1.833313 )
+( -0.655903 -0.763218 -0.910706 )
+inverse(Matrix3) * Matrix3:
+( 1.000000 0.000000 0.000000 )
+( -0.000000 1.000000 -0.000000 )
+( -0.000000 0.000000 1.000000 )
+0.111420
+outer Vector3:
+( 0.003542 -0.101247 0.082299 )
+( 0.000223 -0.006385 0.005190 )
+( -0.002738 0.078270 -0.063623 )
+rowMul Vector3: ( 0.866665 -0.066466 0.615935 )
+crossMatrix:
+( 0.000000 -0.568674 -0.046390 )
+( 0.568674 0.000000 0.735610 )
+( 0.046390 -0.735610 0.000000 )
+crossMatrixMul:
+( 0.000000 -0.073080 0.198054 )
+( 0.000000 -0.085037 -0.636550 )
+( 0.000000 -0.101470 0.204267 )
+set Vector3 with floats: ( 0.103833 0.456401 -0.022372 )
+set Vector3 with floats: ( -0.475631 -0.004178 -0.020865 )
+set Vector3 with floats: ( -0.016997 0.699144 0.837796 )
+set Vector3 with floats: ( -0.276082 0.091582 0.209064 )
+set Vector4 with floats: ( 0.219317 -0.118359 0.413442 -0.567698 )
+set Vector4 with floats: ( 0.531358 -0.387226 0.572490 -0.820417 )
+set Vector4 with floats: ( 0.797191 0.867178 0.934764 0.237092 )
+set Vector4 with floats: ( -0.866162 -0.773939 0.261311 -0.851570 )
+set Point3 with floats: ( 0.114814 -0.531592 0.223925 )
+set Point3 with floats: ( 0.869105 0.143405 0.148518 )
+set Point3 with floats: ( -0.071136 -0.758292 -0.527633 )
+set Point3 with floats: ( 0.997215 0.114440 0.727558 )
+set Quat with floats: ( -0.425760 0.459888 0.642516 -0.022534 )
+set Quat with floats: ( 0.186095 -0.775679 -0.683401 0.398134 )
+set Quat with floats: ( 0.189642 0.765986 -0.137795 -0.579844 )
+set Quat with floats: ( -0.635647 0.374970 -0.563750 -0.471075 )
+set Matrix3 columns:
+( 0.103833 -0.475631 -0.016997 )
+( 0.456401 -0.004178 0.699144 )
+( -0.022372 -0.020865 0.837796 )
+set Matrix3 columns:
+( -0.276082 0.103833 -0.475631 )
+( 0.091582 0.456401 -0.004178 )
+( 0.209064 -0.022372 -0.020865 )
+set Matrix4 columns:
+( 0.219317 0.531358 0.797191 -0.866162 )
+( -0.118359 -0.387226 0.867178 -0.773939 )
+( 0.413442 0.572490 0.934764 0.261311 )
+( -0.567698 -0.820417 0.237092 -0.851570 )
+set Matrix4 columns:
+( -0.866162 0.219317 0.531358 0.797191 )
+( -0.773939 -0.118359 -0.387226 0.867178 )
+( 0.261311 0.413442 0.572490 0.934764 )
+( -0.851570 -0.567698 -0.820417 0.237092 )
+set Transform3 columns:
+( 0.103833 -0.475631 -0.016997 -0.276082 )
+( 0.456401 -0.004178 0.699144 0.091582 )
+( -0.022372 -0.020865 0.837796 0.209064 )
+set Transform3 columns:
+( -0.276082 0.103833 -0.475631 -0.016997 )
+( 0.091582 0.456401 -0.004178 0.699144 )
+( 0.209064 -0.022372 -0.020865 0.837796 )
+appendScale Matrix4 Vector3:
+( 0.022772 0.242513 -0.017835 -0.866162 )
+( -0.012290 -0.176730 -0.019401 -0.773939 )
+( 0.042929 0.261285 -0.020913 0.261311 )
+( -0.058946 -0.374439 -0.005304 -0.851570 )
+prependScale Vector3 Matrix4:
+( 0.022772 0.055173 0.082775 -0.089936 )
+( -0.054019 -0.176730 0.395781 -0.353227 )
+( -0.009250 -0.012808 -0.020913 -0.005846 )
+( -0.567698 -0.820417 0.237092 -0.851570 )
+mulPerElem Matrix4:
+( -0.189964 0.116536 0.423594 -0.690497 )
+( 0.091603 0.045832 -0.335794 -0.671143 )
+( 0.108037 0.236691 0.535143 0.244264 )
+( 0.483434 0.465749 -0.194514 -0.201900 )
+absPerElem Matrix4:
+( 0.219317 0.531358 0.797191 0.866162 )
+( 0.118359 0.387226 0.867178 0.773939 )
+( 0.413442 0.572490 0.934764 0.261311 )
+( 0.567698 0.820417 0.237092 0.851570 )
+transpose Matrix4:
+( 0.219317 -0.118359 0.413442 -0.567698 )
+( 0.531358 -0.387226 0.572490 -0.820417 )
+( 0.797191 0.867178 0.934764 0.237092 )
+( -0.866162 -0.773939 0.261311 -0.851570 )
+inverse Matrix4:
+( -0.801304 6.311381 -3.640203 -6.038011 )
+( 1.289204 -3.528897 1.571125 2.378011 )
+( -0.220138 -0.375260 1.356762 0.981293 )
+( -0.769143 -0.912151 1.290834 0.833121 )
+inverse(Matrix4) * Matrix4:
+( 1.000000 -0.000000 -0.000000 0.000000 )
+( 0.000000 1.000000 -0.000000 0.000000 )
+( -0.000000 0.000000 1.000000 -0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+affineInverse Matrix4:
+( -0.548992 -0.521721 -0.653005 -0.708658 )
+( -0.449878 -0.474001 0.756923 -0.954307 )
+( -0.704428 0.709317 0.025512 -0.067847 )
+( 0.000000 0.000000 0.000000 1.000000 )
+affineInverse(Matrix4) * Matrix4:
+( 1.000000 0.000000 0.000000 0.000000 )
+( 0.000000 1.000000 -0.000000 0.000000 )
+( 0.000000 0.000000 1.000000 -0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+orthoInverse Matrix4:
+( -0.548992 -0.521721 -0.653005 -0.708658 )
+( -0.449878 -0.474001 0.756923 -0.954307 )
+( -0.704428 0.709317 0.025512 -0.067847 )
+( 0.000000 0.000000 0.000000 1.000000 )
+orthoInverse(Matrix4) * Matrix4:
+( 1.000000 0.000000 0.000000 -0.000000 )
+( 0.000000 1.000000 -0.000000 0.000000 )
+( 0.000000 -0.000000 1.000000 0.000000 )
+( 0.000000 0.000000 0.000000 1.000000 )
+1.000000
+outer Vector4:
+( 0.116536 -0.084925 0.125557 -0.179931 )
+( -0.062891 0.045832 -0.067760 0.097104 )
+( 0.219686 -0.160095 0.236691 -0.339195 )
+( -0.301651 0.219827 -0.325001 0.465749 )
+set Vector3 with floats: ( -0.553800 -0.014688 -0.464365 )
+set Vector3 with floats: ( -0.107890 -0.527503 -0.406423 )
+set Vector3 with floats: ( 0.301261 0.499529 0.385180 )
+set Vector3 with floats: ( -0.150218 0.519112 -0.203209 )
+set Vector4 with floats: ( -0.252017 0.282194 0.067637 0.798376 )
+set Vector4 with floats: ( 0.310782 0.861334 -0.980345 -0.655106 )
+set Vector4 with floats: ( 0.286765 0.532078 0.352671 0.540977 )
+set Vector4 with floats: ( 0.510961 0.791871 -0.564379 0.273199 )
+set Point3 with floats: ( 0.194378 0.244636 -0.269608 )
+set Point3 with floats: ( -0.858162 -0.495023 -0.277798 )
+set Point3 with floats: ( -0.032740 0.007412 -0.420178 )
+set Point3 with floats: ( -0.522577 0.324972 0.795389 )
+set Quat with floats: ( 0.342900 -0.913636 0.675222 0.144053 )
+set Quat with floats: ( -0.632329 -0.947120 -0.049367 0.126333 )
+set Quat with floats: ( -0.664206 0.220879 0.284219 -0.387216 )
+set Quat with floats: ( 0.913568 0.531906 0.271995 -0.862601 )
+set Matrix3 columns:
+( -0.553800 -0.107890 0.301261 )
+( -0.014688 -0.527503 0.499529 )
+( -0.464365 -0.406423 0.385180 )
+set Matrix3 columns:
+( -0.150218 -0.553800 -0.107890 )
+( 0.519112 -0.014688 -0.527503 )
+( -0.203209 -0.464365 -0.406423 )
+set Matrix4 columns:
+( -0.252017 0.310782 0.286765 0.510961 )
+( 0.282194 0.861334 0.532078 0.791871 )
+( 0.067637 -0.980345 0.352671 -0.564379 )
+( 0.798376 -0.655106 0.540977 0.273199 )
+set Matrix4 columns:
+( 0.510961 -0.252017 0.310782 0.286765 )
+( 0.791871 0.282194 0.861334 0.532078 )
+( -0.564379 0.067637 -0.980345 0.352671 )
+( 0.273199 0.798376 -0.655106 0.540977 )
+set Transform3 columns:
+( -0.553800 -0.107890 0.301261 -0.150218 )
+( -0.014688 -0.527503 0.499529 0.519112 )
+( -0.464365 -0.406423 0.385180 -0.203209 )
+set Transform3 columns:
+( -0.150218 -0.553800 -0.107890 0.301261 )
+( 0.519112 -0.014688 -0.527503 0.499529 )
+( -0.203209 -0.464365 -0.406423 0.385180 )
+appendScale Transform3 Vector3:
+( 0.306694 0.001585 -0.139895 -0.150218 )
+( 0.008134 0.007748 -0.231964 0.519112 )
+( 0.257165 0.005970 -0.178864 -0.203209 )
+prependScale Vector3 Transform3:
+( 0.306694 0.059749 -0.166838 0.083191 )
+( 0.000216 0.007748 -0.007337 -0.007625 )
+( 0.215635 0.188729 -0.178864 0.094363 )
+mulPerElem Transform3:
+( 0.083191 0.059749 -0.032503 -0.045255 )
+( -0.007625 0.007748 -0.263503 0.259311 )
+( 0.094363 0.188729 -0.156546 -0.078272 )
+absPerElem Transform3:
+( 0.553800 0.107890 0.301261 0.150218 )
+( 0.014688 0.527503 0.499529 0.519112 )
+( 0.464365 0.406423 0.385180 0.203209 )
+inverse Transform3:
+( 0.003445 1.703147 -2.211457 -1.332994 )
+( 4.765344 1.545948 -5.732023 -1.251475 )
+( 5.032312 3.684490 -6.118052 -2.399958 )
+inverse(Transform3) * Transform3:
+( 1.000000 0.000000 -0.000000 -0.000000 )
+( 0.000000 1.000000 -0.000000 0.000000 )
+( 0.000000 0.000000 1.000000 -0.000000 )
+orthoInverse Transform3:
+( -0.806392 -0.302338 0.508256 0.139094 )
+( -0.574608 0.197327 -0.794285 -0.350157 )
+( 0.139850 -0.932552 -0.332848 0.437469 )
+orthoInverse(Transform3) * Transform3:
+( 1.000000 0.000000 0.000000 -0.000000 )
+( 0.000000 1.000000 -0.000000 0.000000 )
+( 0.000000 -0.000000 1.000000 0.000000 )
+
+ __end__ 
diff --git a/Extras/vectormathlibrary/tests/test4_soa_c.c b/Extras/vectormathlibrary/tests/test4_soa_c.c
new file mode 100644
index 000000000..dbd01502a
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test4_soa_c.c
@@ -0,0 +1,474 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_SOA_TEST
+
+#include "vectormath_soa.h"
+#include "test.h"
+
+int iteration = 0;
+
+void
+Matrix3_methods_test()
+{
+    VmathSoaMatrix3 a_Matrix3, b_Matrix3;
+    VmathSoaMatrix4 a_Matrix4, b_Matrix4;
+    VmathSoaTransform3 a_Transform3, b_Transform3;
+    VmathSoaMatrix3 tmpM3_0, tmpM3_1, tmpM3_2, tmpM3_3, tmpM3_4, tmpM3_5, tmpM3_6, tmpM3_7, tmpM3_8, tmpM3_9, tmpM3_10;
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathSoaVector3 tmpV3_0;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &c_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &d_Quat, "set Quat with floats" );
+    vmathSoaM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathSoaM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathSoaM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathSoaM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathSoaM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathSoaT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathSoaT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathSoaT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathSoaM3AppendScale( &tmpM3_0, &a_Matrix3, &a_Vector3 );
+    vmathSoaM3Prints( &tmpM3_0, "appendScale Matrix3 Vector3" );
+    vmathSoaM3PrependScale( &tmpM3_1, &a_Vector3, &a_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_1, "prependScale Vector3 Matrix3" );
+    vmathSoaM3MulPerElem( &tmpM3_2, &a_Matrix3, &b_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_2, "mulPerElem Matrix3" );
+    vmathSoaM3AbsPerElem( &tmpM3_3, &a_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_3, "absPerElem Matrix3" );
+    vmathSoaM3Transpose( &tmpM3_4, &a_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_4, "transpose Matrix3" );
+    vmathSoaM3Inverse( &tmpM3_5, &a_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_5, "inverse Matrix3" );
+    vmathSoaM3Inverse( &tmpM3_6, &a_Matrix3 );
+    vmathSoaM3Mul( &tmpM3_7, &tmpM3_6, &a_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_7, "inverse(Matrix3) * Matrix3" );
+    printf("%f\n", getfloat(vmathSoaM3Determinant( &a_Matrix3 )) );
+    vmathSoaV3Outer( &tmpM3_8, &a_Vector3, &b_Vector3 );
+    vmathSoaM3Prints( &tmpM3_8, "outer Vector3" );
+    vmathSoaV3RowMul( &tmpV3_0, &a_Vector3, &a_Matrix3 );
+    vmathSoaV3Prints( &tmpV3_0, "rowMul Vector3" );
+    vmathSoaV3CrossMatrix( &tmpM3_9, &a_Vector3 );
+    vmathSoaM3Prints( &tmpM3_9, "crossMatrix" );
+    vmathSoaV3CrossMatrixMul( &tmpM3_10, &a_Vector3, &a_Matrix3 );
+    vmathSoaM3Prints( &tmpM3_10, "crossMatrixMul" );
+}
+
+void
+Matrix4_methods_test()
+{
+    VmathSoaMatrix3 a_Matrix3, b_Matrix3;
+    VmathSoaMatrix4 a_Matrix4, b_Matrix4;
+    VmathSoaTransform3 a_Transform3, b_Transform3;
+    VmathSoaMatrix4 tmpM4_0, tmpM4_1, tmpM4_2, tmpM4_3, tmpM4_4, tmpM4_5, tmpM4_6, tmpM4_7;
+    VmathSoaMatrix3 tmpM3_0;
+    VmathSoaMatrix4 tmpM4_8, tmpM4_9, tmpM4_10, tmpM4_11, tmpM4_12, tmpM4_13, tmpM4_14;
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat;
+    VmathSoaVector4 tmpV4_0;
+    VmathSoaQuat tmpQ_0;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &c_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &d_Quat, "set Quat with floats" );
+    vmathSoaM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathSoaM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathSoaM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathSoaM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathSoaM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathSoaT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathSoaT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathSoaT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathSoaM4AppendScale( &tmpM4_0, &a_Matrix4, &a_Vector3 );
+    vmathSoaM4Prints( &tmpM4_0, "appendScale Matrix4 Vector3" );
+    vmathSoaM4PrependScale( &tmpM4_1, &a_Vector3, &a_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_1, "prependScale Vector3 Matrix4" );
+    vmathSoaM4MulPerElem( &tmpM4_2, &a_Matrix4, &b_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_2, "mulPerElem Matrix4" );
+    vmathSoaM4AbsPerElem( &tmpM4_3, &a_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_3, "absPerElem Matrix4" );
+    vmathSoaM4Transpose( &tmpM4_4, &a_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_4, "transpose Matrix4" );
+    vmathSoaM4Inverse( &tmpM4_5, &a_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_5, "inverse Matrix4" );
+    vmathSoaM4Inverse( &tmpM4_6, &a_Matrix4 );
+    vmathSoaM4Mul( &tmpM4_7, &tmpM4_6, &a_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_7, "inverse(Matrix4) * Matrix4" );
+    vmathSoaV4MakeFromElems( &tmpV4_0, ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) );
+    vmathSoaM4SetRow( &a_Matrix4, 3, &tmpV4_0 );
+    vmathSoaQNormalize( &tmpQ_0, &a_Quat );
+    vmathSoaM3MakeFromQ( &tmpM3_0, &tmpQ_0 );
+    vmathSoaM4SetUpper3x3( &a_Matrix4, &tmpM3_0 );
+    vmathSoaM4AffineInverse( &tmpM4_8, &a_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_8, "affineInverse Matrix4" );
+    vmathSoaM4AffineInverse( &tmpM4_9, &a_Matrix4 );
+    vmathSoaM4Mul( &tmpM4_10, &tmpM4_9, &a_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_10, "affineInverse(Matrix4) * Matrix4" );
+    vmathSoaM4OrthoInverse( &tmpM4_11, &a_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_11, "orthoInverse Matrix4" );
+    vmathSoaM4OrthoInverse( &tmpM4_12, &a_Matrix4 );
+    vmathSoaM4Mul( &tmpM4_13, &tmpM4_12, &a_Matrix4 );
+    vmathSoaM4Prints( &tmpM4_13, "orthoInverse(Matrix4) * Matrix4" );
+    printf("%f\n", getfloat(vmathSoaM4Determinant( &a_Matrix4 )) );
+    vmathSoaV4Outer( &tmpM4_14, &a_Vector4, &b_Vector4 );
+    vmathSoaM4Prints( &tmpM4_14, "outer Vector4" );
+}
+
+void
+Transform3_methods_test()
+{
+    VmathSoaMatrix3 a_Matrix3, b_Matrix3;
+    VmathSoaMatrix4 a_Matrix4, b_Matrix4;
+    VmathSoaTransform3 a_Transform3, b_Transform3, tmpT3_0, tmpT3_1, tmpT3_2, tmpT3_3, tmpT3_4, tmpT3_5, tmpT3_6;
+    VmathSoaMatrix3 tmpM3_0;
+    VmathSoaTransform3 tmpT3_7, tmpT3_8, tmpT3_9;
+    VmathSoaVector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    VmathSoaVector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    VmathSoaPoint3 a_Point3, b_Point3, c_Point3, d_Point3;
+    VmathSoaQuat a_Quat, b_Quat, c_Quat, d_Quat, tmpQ_0;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &a_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV3MakeFromElems( &b_Vector3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &c_Vector3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaV3MakeFromElems( &d_Vector3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaV3Prints( &a_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &b_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &c_Vector3, "set Vector3 with floats" );
+    vmathSoaV3Prints( &d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &a_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaV4MakeFromElems( &b_Vector4, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &c_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaV4MakeFromElems( &d_Vector4, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaV4Prints( &a_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &b_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &c_Vector4, "set Vector4 with floats" );
+    vmathSoaV4Prints( &d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &a_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaP3MakeFromElems( &b_Point3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &c_Point3, rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    vmathSoaP3MakeFromElems( &d_Point3, rndflt1, rndflt2, rndflt3 );
+    vmathSoaP3Prints( &a_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &b_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &c_Point3, "set Point3 with floats" );
+    vmathSoaP3Prints( &d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &a_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    vmathSoaQMakeFromElems( &b_Quat, rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &c_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    vmathSoaQMakeFromElems( &d_Quat, rndflt1, rndflt2, rndflt3, rndflt4 );
+    vmathSoaQPrints( &a_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &b_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &c_Quat, "set Quat with floats" );
+    vmathSoaQPrints( &d_Quat, "set Quat with floats" );
+    vmathSoaM3MakeFromCols( &a_Matrix3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaM3MakeFromCols( &b_Matrix3, &d_Vector3, &a_Vector3, &b_Vector3 );
+    vmathSoaM3Prints( &a_Matrix3, "set Matrix3 columns" );
+    vmathSoaM3Prints( &b_Matrix3, "set Matrix3 columns" );
+    vmathSoaM4MakeFromCols( &a_Matrix4, &a_Vector4, &b_Vector4, &c_Vector4, &d_Vector4 );
+    vmathSoaM4MakeFromCols( &b_Matrix4, &d_Vector4, &a_Vector4, &b_Vector4, &c_Vector4 );
+    vmathSoaM4Prints( &a_Matrix4, "set Matrix4 columns" );
+    vmathSoaM4Prints( &b_Matrix4, "set Matrix4 columns" );
+    vmathSoaT3MakeFromCols( &a_Transform3, &a_Vector3, &b_Vector3, &c_Vector3, &d_Vector3 );
+    vmathSoaT3MakeFromCols( &b_Transform3, &d_Vector3, &a_Vector3, &b_Vector3, &c_Vector3 );
+    vmathSoaT3Prints( &a_Transform3, "set Transform3 columns" );
+    vmathSoaT3Prints( &b_Transform3, "set Transform3 columns" );
+    vmathSoaT3AppendScale( &tmpT3_0, &a_Transform3, &a_Vector3 );
+    vmathSoaT3Prints( &tmpT3_0, "appendScale Transform3 Vector3" );
+    vmathSoaT3PrependScale( &tmpT3_1, &a_Vector3, &a_Transform3 );
+    vmathSoaT3Prints( &tmpT3_1, "prependScale Vector3 Transform3" );
+    vmathSoaT3MulPerElem( &tmpT3_2, &a_Transform3, &b_Transform3 );
+    vmathSoaT3Prints( &tmpT3_2, "mulPerElem Transform3" );
+    vmathSoaT3AbsPerElem( &tmpT3_3, &a_Transform3 );
+    vmathSoaT3Prints( &tmpT3_3, "absPerElem Transform3" );
+    vmathSoaT3Inverse( &tmpT3_4, &a_Transform3 );
+    vmathSoaT3Prints( &tmpT3_4, "inverse Transform3" );
+    vmathSoaT3Inverse( &tmpT3_5, &a_Transform3 );
+    vmathSoaT3Mul( &tmpT3_6, &tmpT3_5, &a_Transform3 );
+    vmathSoaT3Prints( &tmpT3_6, "inverse(Transform3) * Transform3" );
+    vmathSoaQNormalize( &tmpQ_0, &a_Quat );
+    vmathSoaM3MakeFromQ( &tmpM3_0, &tmpQ_0 );
+    vmathSoaT3SetUpper3x3( &a_Transform3, &tmpM3_0 );
+    vmathSoaT3OrthoInverse( &tmpT3_7, &a_Transform3 );
+    vmathSoaT3Prints( &tmpT3_7, "orthoInverse Transform3" );
+    vmathSoaT3OrthoInverse( &tmpT3_8, &a_Transform3 );
+    vmathSoaT3Mul( &tmpT3_9, &tmpT3_8, &a_Transform3 );
+    vmathSoaT3Prints( &tmpT3_9, "orthoInverse(Transform3) * Transform3" );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/tests/test4_soa_cpp.cpp b/Extras/vectormathlibrary/tests/test4_soa_cpp.cpp
new file mode 100644
index 000000000..3a71aef2e
--- /dev/null
+++ b/Extras/vectormathlibrary/tests/test4_soa_cpp.cpp
@@ -0,0 +1,426 @@
+/*
+  Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms,
+  with or without modification, are permitted provided that the
+  following conditions are met:
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+   * Neither the name of the Sony Computer Entertainment Inc nor the names
+     of its contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _VECTORMATH_SOA_TEST
+
+#include "vectormath_soa.h"
+#include "test.h"
+
+int iteration = 0;
+
+using namespace Vectormath;
+using namespace Vectormath::Soa;
+
+void
+Matrix3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( appendScale( a_Matrix3, a_Vector3 ), "appendScale Matrix3 Vector3" );
+    print( prependScale( a_Vector3, a_Matrix3 ), "prependScale Vector3 Matrix3" );
+    print( mulPerElem( a_Matrix3, b_Matrix3 ), "mulPerElem Matrix3" );
+    print( absPerElem( a_Matrix3 ), "absPerElem Matrix3" );
+    print( transpose( a_Matrix3 ), "transpose Matrix3" );
+    print( inverse( a_Matrix3 ), "inverse Matrix3" );
+    print( ( inverse( a_Matrix3 ) * a_Matrix3 ), "inverse(Matrix3) * Matrix3" );
+    printf("%f\n", getfloat(determinant( a_Matrix3 )) );
+    print( outer( a_Vector3, b_Vector3 ), "outer Vector3" );
+    print( rowMul( a_Vector3, a_Matrix3 ), "rowMul Vector3" );
+    print( crossMatrix( a_Vector3 ), "crossMatrix" );
+    print( crossMatrixMul( a_Vector3, a_Matrix3 ), "crossMatrixMul" );
+}
+
+void
+Matrix4_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( appendScale( a_Matrix4, a_Vector3 ), "appendScale Matrix4 Vector3" );
+    print( prependScale( a_Vector3, a_Matrix4 ), "prependScale Vector3 Matrix4" );
+    print( mulPerElem( a_Matrix4, b_Matrix4 ), "mulPerElem Matrix4" );
+    print( absPerElem( a_Matrix4 ), "absPerElem Matrix4" );
+    print( transpose( a_Matrix4 ), "transpose Matrix4" );
+    print( inverse( a_Matrix4 ), "inverse Matrix4" );
+    print( ( inverse( a_Matrix4 ) * a_Matrix4 ), "inverse(Matrix4) * Matrix4" );
+    a_Matrix4.setRow( 3, Vector4( ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){0.0f,0.0f,0.0f,0.0f}), ((vec_float4){1.0f,1.0f,1.0f,1.0f}) ) );
+    a_Matrix4.setUpper3x3( Matrix3( normalize( a_Quat ) ) );
+    print( affineInverse( a_Matrix4 ), "affineInverse Matrix4" );
+    print( ( affineInverse( a_Matrix4 ) * a_Matrix4 ), "affineInverse(Matrix4) * Matrix4" );
+    print( orthoInverse( a_Matrix4 ), "orthoInverse Matrix4" );
+    print( ( orthoInverse( a_Matrix4 ) * a_Matrix4 ), "orthoInverse(Matrix4) * Matrix4" );
+    printf("%f\n", getfloat(determinant( a_Matrix4 )) );
+    print( outer( a_Vector4, b_Vector4 ), "outer Vector4" );
+}
+
+void
+Transform3_methods_test()
+{
+    Matrix3 a_Matrix3, b_Matrix3;
+    Matrix4 a_Matrix4, b_Matrix4;
+    Transform3 a_Transform3, b_Transform3;
+    Vector3 a_Vector3, b_Vector3, c_Vector3, d_Vector3;
+    Vector4 a_Vector4, b_Vector4, c_Vector4, d_Vector4;
+    Point3 a_Point3, b_Point3, c_Point3, d_Point3;
+    Quat a_Quat, b_Quat, c_Quat, d_Quat;
+    vec_float4 rndflt1, rndflt2, rndflt3, rndflt4, rndflt5, rndflt6;
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector3 = Vector3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Vector3 = Vector3( rndflt1, rndflt2, rndflt3 );
+    print( a_Vector3, "set Vector3 with floats" );
+    print( b_Vector3, "set Vector3 with floats" );
+    print( c_Vector3, "set Vector3 with floats" );
+    print( d_Vector3, "set Vector3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Vector4 = Vector4( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Vector4 = Vector4( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Vector4, "set Vector4 with floats" );
+    print( b_Vector4, "set Vector4 with floats" );
+    print( c_Vector4, "set Vector4 with floats" );
+    print( d_Vector4, "set Vector4 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    a_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Point3 = Point3( rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    c_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    d_Point3 = Point3( rndflt1, rndflt2, rndflt3 );
+    print( a_Point3, "set Point3 with floats" );
+    print( b_Point3, "set Point3 with floats" );
+    print( c_Point3, "set Point3 with floats" );
+    print( d_Point3, "set Point3 with floats" );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    a_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    rndflt5 = randfloat();
+    rndflt6 = randfloat();
+    b_Quat = Quat( rndflt3, rndflt4, rndflt5, rndflt6 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    c_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    rndflt1 = randfloat();
+    rndflt2 = randfloat();
+    rndflt3 = randfloat();
+    rndflt4 = randfloat();
+    d_Quat = Quat( rndflt1, rndflt2, rndflt3, rndflt4 );
+    print( a_Quat, "set Quat with floats" );
+    print( b_Quat, "set Quat with floats" );
+    print( c_Quat, "set Quat with floats" );
+    print( d_Quat, "set Quat with floats" );
+    a_Matrix3 = Matrix3( a_Vector3, b_Vector3, c_Vector3 );
+    b_Matrix3 = Matrix3( d_Vector3, a_Vector3, b_Vector3 );
+    print( a_Matrix3, "set Matrix3 columns" );
+    print( b_Matrix3, "set Matrix3 columns" );
+    a_Matrix4 = Matrix4( a_Vector4, b_Vector4, c_Vector4, d_Vector4 );
+    b_Matrix4 = Matrix4( d_Vector4, a_Vector4, b_Vector4, c_Vector4 );
+    print( a_Matrix4, "set Matrix4 columns" );
+    print( b_Matrix4, "set Matrix4 columns" );
+    a_Transform3 = Transform3( a_Vector3, b_Vector3, c_Vector3, d_Vector3 );
+    b_Transform3 = Transform3( d_Vector3, a_Vector3, b_Vector3, c_Vector3 );
+    print( a_Transform3, "set Transform3 columns" );
+    print( b_Transform3, "set Transform3 columns" );
+    print( appendScale( a_Transform3, a_Vector3 ), "appendScale Transform3 Vector3" );
+    print( prependScale( a_Vector3, a_Transform3 ), "prependScale Vector3 Transform3" );
+    print( mulPerElem( a_Transform3, b_Transform3 ), "mulPerElem Transform3" );
+    print( absPerElem( a_Transform3 ), "absPerElem Transform3" );
+    print( inverse( a_Transform3 ), "inverse Transform3" );
+    print( ( inverse( a_Transform3 ) * a_Transform3 ), "inverse(Transform3) * Transform3" );
+    a_Transform3.setUpper3x3( Matrix3( normalize( a_Quat ) ) );
+    print( orthoInverse( a_Transform3 ), "orthoInverse Transform3" );
+    print( ( orthoInverse( a_Transform3 ) * a_Transform3 ), "orthoInverse(Transform3) * Transform3" );
+}
+
+int main()
+{
+    int i;
+    printf("\n __begin__ \n");
+    for ( i = 0; i < 2; i++ ) {
+        Matrix3_methods_test();
+        Matrix4_methods_test();
+        Transform3_methods_test();
+    }
+    printf("\n __end__ \n");
+    return 0;
+}
diff --git a/Extras/vectormathlibrary/vectormath.spec b/Extras/vectormathlibrary/vectormath.spec
new file mode 100644
index 000000000..2d739c317
--- /dev/null
+++ b/Extras/vectormathlibrary/vectormath.spec
@@ -0,0 +1,81 @@
+%define lib_version 1.0.1
+
+Summary: Vector math library
+Name: vectormath
+Version: %{lib_version}
+Release: 1
+License: BSD
+Group: Development/Libraries
+Source0: %{name}-%{version}.tar.gz
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
+
+%description
+Vector math library.
+
+%ifarch ppc ppc64
+%define _lib_arch ppu
+%endif
+
+%ifarch i386 x86_64
+%define _lib_arch SSE
+%endif
+
+%if %{undefined _lib_arch}
+%define _lib_arch scalar
+%endif
+
+%package -n %{name}-devel
+Summary: Vector math library.
+Group: Development/Libraries
+%ifarch ppc ppc64
+Requires: simdmath-devel
+%endif
+
+%description -n %{name}-devel
+Vector math library.
+
+%ifarch ppc ppc64
+%package -n spu-%{name}-devel
+Summary: Vector math library.
+Group: Development/Libraries
+Requires: spu-simdmath-devel
+
+%description -n spu-%{name}-devel
+Vector math library.
+%endif
+
+%prep
+%setup -q
+
+%build
+
+%install
+rm -rf %{buildroot}
+
+make ARCH=%{_lib_arch} DESTDIR=%{buildroot} install
+
+%ifarch ppc ppc64
+make ARCH=spu DESTDIR=%{buildroot} install
+%endif
+
+mkdir -p %{buildroot}/%{_docdir}/%{name}-%{version}
+cp README LICENSE doc/*.pdf %{buildroot}/%{_docdir}/%{name}-%{version}/
+
+
+%clean
+rm -rf %{buildroot}
+
+%files -n %{name}-devel
+%defattr(-,root,root,-)
+%{_includedir}/*
+%{_docdir}/*
+
+%ifarch ppc ppc64
+%files -n spu-%{name}-devel
+%defattr(-,root,root,-)
+%{_prefix}/spu/include/*
+%endif
+
+%changelog
+* Wed Aug  8 2007 Kazunori Asayama <asayama@sm.sony.co.jp> - 1.0.1-1
+- Initial build.